Compare commits
	
		
			731 Commits
		
	
	
		
			nilmdb-ori
			...
			b6bba16505
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| b6bba16505 | |||
| d4003d0d34 | |||
| 759492298a | |||
| b5f6fcc253 | |||
| 905e325ded | |||
| 648b6f4b70 | |||
| 7f8a2c7027 | |||
| 276fbc652a | |||
| 10b34f5937 | |||
| 83daeb148a | |||
| d65f00e8b2 | |||
| 71dc01c9a7 | |||
| bcd21b3498 | |||
| a1dee0e6f2 | |||
| 99ac47cf0d | |||
| 4cdaef51c1 | |||
| 88466dcafe | |||
| 8dfb8da15c | |||
| 6cc1f6b7b2 | |||
| 8dc36c2d37 | |||
| 3738430103 | |||
| a41111b045 | |||
| 85f822e1c4 | |||
| 0222dfebf0 | |||
| 70914690c1 | |||
| 10400f2b07 | |||
| 56153ff7ad | |||
| 671f87b047 | |||
| 2f2faeeab7 | |||
| 2ed544bd30 | |||
| 6821b2a97b | |||
| b20bb92988 | |||
| 699de7b11f | |||
| ea67e45be9 | |||
| ca440a42bd | |||
| 4ff4b263b4 | |||
| 79e544c733 | |||
| 9acf99ff25 | |||
| 4958a5ab2e | |||
| f2d89e2da5 | |||
| 1952f245c0 | |||
| 7cbc0c11c3 | |||
| 9f2651c35e | |||
| 9126980ed4 | |||
| ea051c85b3 | |||
| d8294469cf | |||
| 96eadb0577 | |||
| fb524c649f | |||
| 19a34a07a4 | |||
| d8df6f515f | |||
| 90ee127c87 | |||
| 0b631b7dea | |||
| f587518adb | |||
| efbb2665fe | |||
| 544413018c | |||
| 322b0ec423 | |||
| f3833d9b20 | |||
| 735c8497af | |||
| 7252e40c2d | |||
| caa5604d81 | |||
| 6624e8dab6 | |||
| d907638858 | |||
| 39e66fe38c | |||
| ba915bb290 | |||
| 3f0b8e50a2 | |||
| f93edc469c | |||
| 087fb39475 | |||
| 8b4acf41d6 | |||
| 32a76ccf3f | |||
| 5f9367bdd3 | |||
| 5848d03507 | |||
| 36dc448f02 | |||
| 2764283f59 | |||
| 2d0c3f7868 | |||
| cadba9fbba | |||
| 2d200a86c9 | |||
| 640c1bc95e | |||
| b574fc86f4 | |||
| 02ee18c410 | |||
| d1e241a213 | |||
| c5c7f638e7 | |||
| a1218fd20b | |||
| c58a933d21 | |||
| 7874e1ebfa | |||
| 79b410a85b | |||
| 6645395924 | |||
| beb3eadd38 | |||
| edf4568e8f | |||
| a962258b2a | |||
| fa011559c1 | |||
| 349eec3942 | |||
| 99500f3a88 | |||
| 54eccb17aa | |||
| cc8ac74a37 | |||
| 3be904d158 | |||
| 5d9fc5500c | |||
| 57751f5b32 | |||
| 1c005518d8 | |||
| 3279f7ef2c | |||
| a2e124f444 | |||
| 6d673bd2be | |||
| 613a3185e3 | |||
| c83ee65cf7 | |||
| 113633459d | |||
| 41abf53085 | |||
| fef3e1d31e | |||
| 02db87eee6 | |||
| ad85c3dd29 | |||
| 0e6ccd687b | |||
| 85d4c419fd | |||
| 159278066c | |||
| b69358a185 | |||
| e82ef60e2e | |||
| 911d9bc284 | |||
| 752a9b36ae | |||
| 97d17de8ad | |||
| 5da7e6558e | |||
| 1928caa1d7 | |||
| 5db034432c | |||
| 55119a3e07 | |||
| a9eff10dbf | |||
| 0f5c1c0db6 | |||
| d17365ca37 | |||
| 8125d9c840 | |||
| ba55ad82f0 | |||
| 45c81d2019 | |||
| 78cfda32e3 | |||
| 3658d3876b | |||
| 022b50950f | |||
| e5efbadc8e | |||
| 74f633c9da | |||
| ab9a327130 | |||
| da72fc9777 | |||
| a01cb4132d | |||
| 7c3da2fe44 | |||
| f0e06dc436 | |||
| ddc0eb4264 | |||
| 0a22db3965 | |||
| 8bb8f068de | |||
| 416902097d | |||
| f5276e9fc8 | |||
| c47f28f93a | |||
| 63b5f99b90 | |||
| 7d7b89b52f | |||
| 8d249273c6 | |||
| abe431c663 | |||
| ccf1f695af | |||
| 06f7390c9e | |||
| 6de77a08f1 | |||
| 8db9771c20 | |||
| 04f815a24b | |||
| 6868f5f126 | |||
| ca0943ec19 | |||
| 68addb4e4a | |||
| 68c33b1f14 | |||
| 8dd8741100 | |||
| 8e6341ae5d | |||
| 422b1e2df2 | |||
| 0f745b3047 | |||
| 71cd7ed9b7 | |||
| a79d6104d5 | |||
| 8e8ec59e30 | |||
| b89b945a0f | |||
| bd7bdb2eb8 | |||
| 840cd2fd13 | |||
| bbd59c8b50 | |||
| 405c110fd7 | |||
| 274adcd856 | |||
| a1850c9c2c | |||
| 6cd28b67b1 | |||
| d6d215d53d | |||
| e02143ddb2 | |||
| e275384d03 | |||
| a6a67ec15c | |||
| fc43107307 | |||
| 90633413bb | |||
| c7c3aff0fb | |||
| e2347c954e | |||
| 222a5c6c53 | |||
| 1ca2c143e5 | |||
| b5df575c79 | |||
| 2768a5ad15 | |||
| a105543c38 | |||
| 309f38d0ed | |||
| 9a27b6ef6a | |||
| 99532cf9e0 | |||
| dfdd0e5c74 | |||
| 9a2699adfc | |||
| 9bbb95b18b | |||
| 6bbed322c5 | |||
| 2317894355 | |||
| 539c92226c | |||
| 77c766d85d | |||
| 49d04db1d6 | |||
| ea838d05ae | |||
| f2a48bdb2a | |||
| 6d14e0b8aa | |||
| b31b9327b9 | |||
| b98ff1331a | |||
| 00e6ba1124 | |||
| 01029230c9 | |||
| ecc4e5ef9d | |||
| 23f31c472b | |||
| a1e2746360 | |||
| 1c40d59a52 | |||
| bfb09a189f | |||
| 416a499866 | |||
| 637d193807 | |||
| b7fa5745ce | |||
| 0104c8edd9 | |||
| cf3b8e787d | |||
| 83d022016c | |||
| 43b740ecaa | |||
| 4ce059b920 | |||
| 99a4228285 | |||
| 230ec72609 | |||
| d36ece3767 | |||
| 231963538e | |||
| b4d6aad6de | |||
| e95142eabf | |||
| d21c3470bc | |||
| 7576883f49 | |||
| cc211542f8 | |||
| 8292dcf70b | |||
| b362fd37f6 | |||
| 41ec13ee17 | |||
| efa9aa9097 | |||
| d9afb48f45 | |||
| d1140e0f16 | |||
| 6091e44561 | |||
| e233ba790f | |||
| f0304b4c00 | |||
| 60594ca58e | |||
| c7f2df4abc | |||
| 5b7409f802 | |||
| 06038062a2 | |||
| ae9fe89759 | |||
| 04def60021 | |||
| 9ce0f69dff | |||
| 90c3be91c4 | |||
| ebccfb3531 | |||
| e006f1d02e | |||
| 5292319802 | |||
| 173121ca87 | |||
| 26bab031bd | |||
| b5fefffa09 | |||
| dccb3e370a | |||
| 95ca55aa7e | |||
| e01813f29d | |||
| 7f41e117a2 | |||
| dd5fc806e5 | |||
| f8ca8d31e6 | |||
| ed89d803f0 | |||
| 3d24092cd2 | |||
| 304bb43d85 | |||
| 59a79a30a5 | |||
| c0d450d39e | |||
| 6f14d609b2 | |||
| 77ef87456f | |||
| 32d6af935c | |||
| 6af3a6fc41 | |||
| f8a06fb3b7 | |||
| e790bb9e8a | |||
| 89be6f5931 | |||
| 4cdef3285d | |||
| bcd82c4d59 | |||
| caf63ab01f | |||
| 2d72891162 | |||
| cda2ac3e77 | |||
| 57d3d60f6a | |||
| d6b5befe76 | |||
| 7429c1788d | |||
| 0ef71c193b | |||
| 4a50dd015e | |||
| 22274550ab | |||
| 4f06d6ae68 | |||
| c54d8041c3 | |||
| 52ae397d7d | |||
| d05b6f6348 | |||
| 049375d30e | |||
| 88eb0123f5 | |||
| a547ddbbba | |||
| 28e72fd53e | |||
| f63107b334 | |||
| 955d7aa871 | |||
| b8d2cf1b78 | |||
| 7c465730de | |||
| aca130272d | |||
| 76e5e9883f | |||
| fb4f4519ff | |||
| 30328714a7 | |||
| 759466de4a | |||
| d3efb829b5 | |||
| 90b96799ac | |||
| 56679ad770 | |||
| b5541722c2 | |||
| aaea105861 | |||
| e6a081d639 | |||
| 1835d03412 | |||
| c7a712d8d8 | |||
| 20d315b4f7 | |||
| a44a5e3135 | |||
| 039b2a0557 | |||
| cd1dfe7dcd | |||
| fb35517dfa | |||
| b9f0b35bbe | |||
| b1b09f8cd0 | |||
| d467df7980 | |||
| 09bc7eb48c | |||
| b77f07a4cd | |||
| 59f0076306 | |||
| 83bc5bc775 | |||
| 6b1dfec828 | |||
| d827f41fa5 | |||
| 7eca587fdf | |||
| a351bc1b10 | |||
| 1d61d61a81 | |||
| 755255030b | |||
| 8e79998e95 | |||
| 9f914598c2 | |||
| 0468b04538 | |||
| 232a3876c2 | |||
| 1c27dd72d6 | |||
| de5e474001 | |||
| 0fc092779d | |||
| 7abfdfbf3e | |||
| 92724d10ba | |||
| 1d7acbf916 | |||
| ea3ea487bc | |||
| 69ad8c4842 | |||
| 0047e0360a | |||
| 1ac6abdad0 | |||
| 65f09f793c | |||
| 84e21ff467 | |||
| 11b228f77a | |||
| 7860a6aefb | |||
| 454e561d69 | |||
| fe91ff59a3 | |||
| 64c24a00d6 | |||
| 58c0ae72f6 | |||
| c5f079f61f | |||
| 16f23f4a91 | |||
| b0f12d55dd | |||
| 8a648c1b97 | |||
| 2d45466f66 | |||
| c6a0e6e96f | |||
| 79755dc624 | |||
| f260f2c83d | |||
| 14402005bf | |||
| 0d372fb878 | |||
| 5eac924118 | |||
| 0b75da7a8f | |||
| 2dfc94b566 | |||
| e318888a06 | |||
| 7c95934cc2 | |||
| 96df9d8323 | |||
| 31e2c7c8b4 | |||
| 2a725ee13f | |||
| eb8037ee3c | |||
| fadb84d703 | |||
| 9d0d2415be | |||
| 130dae0734 | |||
| 402234dfc3 | |||
| 4406d51a98 | |||
| 9b6de6ecb7 | |||
| c512631184 | |||
| 19d27c31bc | |||
| 28310fe886 | |||
| 1ccc2bce7e | |||
| 00237e30b2 | |||
| 521ff88f7c | |||
| 64897a1dd1 | |||
| 41ce8480bb | |||
| 204a6ecb15 | |||
| 5db3b186a4 | |||
| fe640cf421 | |||
| ca67c79fe4 | |||
| 8917bcd4bf | |||
| a75ec98673 | |||
| e476338d61 | |||
| d752b882f2 | |||
| ade27773e6 | |||
| 0c1a1d2388 | |||
| e3f335dfe5 | |||
| 7a191c0ebb | |||
| 55bf11e393 | |||
| e90dcd10f3 | |||
| 7d44f4eaa0 | |||
| f541432d44 | |||
| aa4e32f78a | |||
| 2bc1416c00 | |||
| 68bbbf757d | |||
| 3df96fdfdd | |||
| 740ab76eaf | |||
| ce13a47fea | |||
| 50a4a60786 | |||
| 14afa02db6 | |||
| cc990d6ce4 | |||
| 0f5162e0c0 | |||
| b26cd52f8c | |||
| 236d925a1d | |||
| a4a4bc61ba | |||
| 3d82888580 | |||
| 749b878904 | |||
| f396e3934c | |||
| dd7594b5fa | |||
| 4ac1beee6d | |||
| 8c0ce736d8 | |||
| 8858c9426f | |||
| 9123ccb583 | |||
| 5dce851bef | |||
| 5b0441de6b | |||
| 317c53ab6f | |||
| 7db4411462 | |||
| 422317850e | |||
| 965537d8cb | |||
| 0dcdec5949 | |||
| 7fce305a1d | |||
| dfbbe23512 | |||
| 7761a91242 | |||
| 9b06e46bf1 | |||
| 171e6f1871 | |||
| 1431e41d16 | |||
| a49c655816 | |||
| 30e3ffc0e9 | |||
| db7211c3a9 | |||
| c6d57cf5c3 | |||
| ca5253ddee | |||
| e19da84b2e | |||
| 3e8e3542fd | |||
| 2f7365412d | |||
| bba9ad131e | |||
| ee24380d1f | |||
| bfcd91acf8 | |||
| d97291d4d3 | |||
| a61fbbcf45 | |||
| 5adc8fd0a7 | |||
| 251a486c28 | |||
| 1edb96a0bd | |||
| 52e674a192 | |||
| e241c13bf1 | |||
| b53ff31212 | |||
| 2045e89f24 | |||
| 841b2dab5c | |||
| d634f7d3cf | |||
| 1593e181a3 | |||
| 8e781506de | |||
| f6a2c7620a | |||
| 6c30e5ab2f | |||
| 810eac4e61 | |||
| d9bb3ab7ab | |||
| 21d0e90bd9 | |||
| f071d749ce | |||
| d95c354595 | |||
| 9bcd8183f6 | |||
| 5c531d8273 | |||
| 3fe3e2ca95 | |||
| f01e781469 | |||
| e6180a5a81 | |||
| a9d31b46ed | |||
| b01f23ed99 | |||
| 842bf21411 | |||
| 750d9e3c38 | |||
| 3b90318f83 | |||
| 1fb37604d3 | |||
| 018ecab310 | |||
| 6a1d6017e2 | |||
| e7406f8147 | |||
| f316026592 | |||
| a8db747768 | |||
| 727af94722 | |||
| 6c89659df7 | |||
| 58c7c8f6ff | |||
| 225003f412 | |||
| 40b966aef2 | |||
| 294ec6988b | |||
| fad23ebb22 | |||
| b226dc4337 | |||
| e7af863017 | |||
| af6ce5b79c | |||
| 0a6fc943e2 | |||
| 67c6e178e1 | |||
| 9bf213707c | |||
| 5cd7899e98 | |||
| ceec5fb9b3 | |||
| 85be497edb | |||
| bd1b7107af | |||
| b8275f108d | |||
| 2820ff9758 | |||
| a015de893d | |||
| b7f746e66d | |||
| 40cf4941f0 | |||
| 8a418ceb3e | |||
| 0312b6eb07 | |||
| 077f197d24 | |||
| 62354b4dce | |||
| 5970cd85cf | |||
| 4f6a742e6c | |||
| 87b43e5d04 | |||
| f0c2a64ae3 | |||
| e5d3deb6fe | |||
| d321058b48 | |||
| cea83140c0 | |||
| 7807d6caf0 | |||
| 3d0fad3c2a | |||
| fe3b087435 | |||
| bcefe52298 | |||
| f88c148ccc | |||
| 4a47b1d04a | |||
| 80da937cb7 | |||
| c81972e66e | |||
| b09362fde1 | |||
| b7688844fa | |||
| 3d212e7592 | |||
| 7aedfdf9c3 | |||
| ebd4f74959 | |||
| ebe2fbab92 | |||
| 4831a0cae1 | |||
| 07192c6ffb | |||
| 09d325e8ab | |||
| 11b0293d5f | |||
| 493bbed82c | |||
| 3bc25daaab | |||
| 40a3bc4bc3 | |||
| c083d63c96 | |||
| 0221e3ea21 | |||
| f5fd2b064e | |||
| 06e91a6a98 | |||
| 41b3f3c018 | |||
| 842076fef4 | |||
| 10d58f6a47 | |||
| e2464efc12 | |||
| 1beae5024e | |||
| c7c65b6542 | |||
| f41ff0a6e8 | |||
| 389c1d189f | |||
| 487298986e | |||
| d4cd045c48 | |||
| 3816645313 | |||
| 83b937c720 | |||
| b3e6e8976f | |||
| c890ea93cb | |||
| 84c68c6913 | |||
| 6f1e6fe232 | |||
| b0d76312d1 | |||
| 19c846c71c | |||
| f355c73209 | |||
| 173014ba19 | |||
| 24d4752bc3 | |||
| a85b273e2e | |||
| 7f73b4b304 | |||
| f3eb6d1b79 | |||
| 9082cc9f44 | |||
| bf64a40472 | |||
| 32dbeebc09 | |||
| 66ddc79b15 | |||
| 7a8bd0bf41 | |||
| ee552de740 | |||
| 6d1fb61573 | |||
| f094529e66 | |||
| 5fecec2a4c | |||
| 85bb46f45c | |||
| 17c329fd6d | |||
| 437e1b425a | |||
| c0f87db3c1 | |||
| a9c5c19e30 | |||
| f39567b2bc | |||
| 99ec0f4946 | |||
| f5c60f68dc | |||
| bdef0986d6 | |||
| c396c4dac8 | |||
| 0b443f510b | |||
| 66fa6f3824 | |||
| 875fbe969f | |||
| e35e85886e | |||
| 7211217f40 | |||
| d34b980516 | |||
| 6aee52d980 | |||
| 090c8d5315 | |||
| 1042ff9f4b | |||
| bc687969c1 | |||
| de27bd3f41 | |||
| 4dcf713d0e | |||
| f9dea53c24 | |||
| 6cedd7c327 | |||
| 6278d32f7d | |||
| 991039903c | |||
| ea3e92be3f | |||
| 56c3306b38 | |||
| cb6561c151 | |||
| 407aedcd20 | |||
| bf8ff66c77 | |||
| 82f753efb1 | |||
| e950794866 | |||
| cb7c0cf83e | |||
| 33cb7031a3 | |||
| 33492fc851 | |||
| 5101522025 | |||
| 5130ab7e6a | |||
| 27024fb38e | |||
| ff4e934bef | |||
| a1d09fb5fc | |||
| aefaac2405 | |||
| e96cff4fc5 | |||
| 97bec3b1ee | |||
| 27f8dcf06d | |||
| cb97ad3d2c | |||
| 8a7b3b5f95 | |||
| 11cc124019 | |||
| e2daeb5e54 | |||
| cbc7c5125d | |||
| 27fd9d54f9 | |||
| e5e7ae9eda | |||
| 315bc57ac3 | |||
| 3b0b9175d6 | |||
| e570800396 | |||
| c327378373 | |||
| aaffd61e4e | |||
| b32edb1ed6 | |||
| 624980a47b | |||
| 3f436e6dfd | |||
| d647ea7eee | |||
| 9ff30f8c1d | |||
| e3be1a1d8a | |||
| f63e58f2d9 | |||
| a05a026bc7 | |||
| 1d875b1f1f | |||
| f4f2493b59 | |||
| 4501da6edc | |||
| 36045fe53b | |||
| 7eef39d5fd | |||
| de7f78a43b | |||
| fab3567d74 | |||
| 6d6514d5c3 | |||
| b67fe79e47 | |||
| 064b4bf715 | |||
| e08be4c2a8 | |||
| 0276810776 | |||
| 565d0e98a9 | |||
| 47245df9bd | |||
| c07670ac3e | |||
| 37b4376b4c | |||
| 3b52ecafa3 | |||
| ac32647fac | |||
| 4e143dfa18 | |||
| b84ffddd50 | |||
| 1531114677 | |||
| ab4c1f0925 | |||
| f7149e48e8 | |||
| 055cfa12b2 | |||
| 5cb03cd6ef | |||
| 70bcc6d2b3 | |||
| 43d3daa840 | |||
| 7dfa288270 | |||
| 881b9a7bd1 | |||
| e85acdd20c | |||
| d725ed1771 | |||
| d58a27e2bf | |||
| f3b0dfabea | |||
| cccaec326a | |||
| a3f444eb25 | |||
| 277b0c1d00 | |||
| 7bba4a80d9 | |||
| f29d38d9d9 | |||
| b4a0288a39 | |||
| e63ab23d20 | |||
| 776279a4e6 | |||
| 7a9012c3e9 | |||
| 8c619cfde5 | |||
| cc4e3bdb76 | |||
| 3dfd187710 | |||
| cf66eca42c | |||
| 1046d0c47b | |||
| d940aabf66 | |||
| e1bf680d13 | |||
| 18720db594 | |||
| 19c70bf887 | |||
| a672119dd0 | |||
| f721e41f2b | |||
| 853639f390 | |||
| ba11e4467f | |||
| 17073905f7 | |||
| 3b696e2a03 | |||
| dd5658f987 | |||
| 54847b0710 | |||
| b72e276eb8 | |||
| 0808ed5bd8 | |||
| ec25eac697 | |||
| 09340d543d | |||
| 913883be3a | |||
| 8cc1dff0b8 | |||
| 5b2d52b8bc | |||
| 22ef82b59c | |||
| a235c94c02 | |||
| 795d2ac7cf | |||
| 1c4efb92c6 | |||
| 56a1770f45 | |||
| b766aef257 | |||
| 3e5d0ef87d | |||
| a26468c059 | |||
| 0099a41fd8 | |||
| b1baacf272 | |||
| 8afaf8f329 | |||
| 610860c379 | |||
| c076af64af | |||
| d0435cbf91 | |||
| ace199ffa8 | |||
| 5b89fa7ccf | |||
| d8929467eb | |||
| ba66668fff | |||
| 1e1c7fa9c4 | |||
| 9b64a18daf | |||
| 398d382dac | |||
| 53e3c44186 | |||
| 4395f68662 | |||
| c5ec07a661 | |||
| 4ca726439b | |||
| 72deddcd9b | |||
| 6812a28131 | |||
| 7933775462 | |||
| 3b9d84d689 | |||
| b5e3b22558 | |||
| 93d7088af3 | |||
| 9f4d88cc8f | |||
| 8da7de817c | |||
| acdea5f00b | |||
| 6c9cf198d7 | |||
| 5fa3ff9610 | |||
| 35a801d997 | |||
| a3634c468f | |||
| dfaeddefa2 | 
							
								
								
									
										11
									
								
								.coveragerc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										11
									
								
								.coveragerc
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,11 @@ | |||||||
|  | # -*- conf -*- | ||||||
|  |  | ||||||
|  | [run] | ||||||
|  | branch = True | ||||||
|  |  | ||||||
|  | [report] | ||||||
|  | exclude_lines = | ||||||
|  | 	pragma: no cover | ||||||
|  | 	if 0: | ||||||
|  | omit = nilmdb/scripts,nilmdb/_version.py,nilmdb/fsck | ||||||
|  | show_missing = True | ||||||
							
								
								
									
										1
									
								
								.gitattributes
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										1
									
								
								.gitattributes
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1 @@ | |||||||
|  | nilmdb/_version.py export-subst | ||||||
							
								
								
									
										24
									
								
								.gitignore
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										24
									
								
								.gitignore
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,24 @@ | |||||||
|  | # Tests | ||||||
|  | tests/*testdb/ | ||||||
|  | .coverage | ||||||
|  | db/ | ||||||
|  |  | ||||||
|  | # Compiled / cythonized files | ||||||
|  | README.html | ||||||
|  | docs/*.html | ||||||
|  | build/ | ||||||
|  | *.pyc | ||||||
|  | nilmdb/server/interval.c | ||||||
|  | nilmdb/server/layout.c | ||||||
|  | nilmdb/server/rbtree.c | ||||||
|  | *.so | ||||||
|  |  | ||||||
|  | # Setup junk | ||||||
|  | dist/ | ||||||
|  | nilmdb.egg-info/ | ||||||
|  | venv/ | ||||||
|  | .eggs/ | ||||||
|  |  | ||||||
|  | # Misc | ||||||
|  | timeit*out | ||||||
|  |  | ||||||
							
								
								
									
										29
									
								
								MANIFEST.in
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										29
									
								
								MANIFEST.in
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,29 @@ | |||||||
|  | # Root | ||||||
|  | include README.txt | ||||||
|  | include setup.cfg | ||||||
|  | include setup.py | ||||||
|  | include versioneer.py | ||||||
|  | include Makefile | ||||||
|  | include .coveragerc | ||||||
|  | include .pylintrc | ||||||
|  | include requirements.txt | ||||||
|  |  | ||||||
|  | # Cython files -- include .pyx source, but not the generated .c files | ||||||
|  | # (Downstream systems must have cython installed in order to build) | ||||||
|  | recursive-include nilmdb/server *.pyx *.pyxdep *.pxd | ||||||
|  | exclude nilmdb/server/interval.c | ||||||
|  | exclude nilmdb/server/rbtree.c | ||||||
|  |  | ||||||
|  | # Version | ||||||
|  | include nilmdb/_version.py | ||||||
|  |  | ||||||
|  | # Tests | ||||||
|  | recursive-include tests *.py | ||||||
|  | recursive-include tests/data * | ||||||
|  | include tests/test.order | ||||||
|  |  | ||||||
|  | # Docs | ||||||
|  | recursive-include docs Makefile *.md | ||||||
|  |  | ||||||
|  | # Extras | ||||||
|  | recursive-include extras * | ||||||
							
								
								
									
										52
									
								
								Makefile
									
									
									
									
									
								
							
							
						
						
									
										52
									
								
								Makefile
									
									
									
									
									
								
							| @@ -1,2 +1,50 @@ | |||||||
| all: | # By default, run the tests. | ||||||
| 	nosetests nilmdb/test_interval.py | all: test | ||||||
|  |  | ||||||
|  | version: | ||||||
|  | 	python3 setup.py version | ||||||
|  |  | ||||||
|  | build: | ||||||
|  | 	python3 setup.py build_ext --inplace | ||||||
|  |  | ||||||
|  | dist: sdist | ||||||
|  | sdist: | ||||||
|  | 	python3 setup.py sdist | ||||||
|  |  | ||||||
|  | install: | ||||||
|  | 	python3 setup.py install | ||||||
|  |  | ||||||
|  | develop: | ||||||
|  | 	python3 setup.py develop | ||||||
|  |  | ||||||
|  | docs: | ||||||
|  | 	make -C docs | ||||||
|  |  | ||||||
|  | ctrl: flake | ||||||
|  | flake: | ||||||
|  | 	flake8 nilmdb | ||||||
|  | lint: | ||||||
|  | 	pylint3 --rcfile=setup.cfg nilmdb | ||||||
|  |  | ||||||
|  | test: | ||||||
|  | ifneq ($(INSIDE_EMACS),) | ||||||
|  | # Use the slightly more flexible script | ||||||
|  | 	python3 setup.py build_ext --inplace | ||||||
|  | 	python3 tests/runtests.py | ||||||
|  | else | ||||||
|  | # Let setup.py check dependencies, build stuff, and run the test | ||||||
|  | 	python3 setup.py nosetests | ||||||
|  | endif | ||||||
|  |  | ||||||
|  | clean:: | ||||||
|  | 	find . -name '*.pyc' -o -name '__pycache__' -print0 | xargs -0 rm -rf | ||||||
|  | 	rm -f .coverage | ||||||
|  | 	rm -rf tests/*testdb* | ||||||
|  | 	rm -rf nilmdb.egg-info/ build/ nilmdb/server/*.so | ||||||
|  | 	make -C docs clean | ||||||
|  |  | ||||||
|  | gitclean:: | ||||||
|  | 	git clean -dXf | ||||||
|  |  | ||||||
|  | .PHONY: all version build dist sdist install docs test | ||||||
|  | .PHONY: ctrl lint flake clean gitclean | ||||||
|   | |||||||
							
								
								
									
										40
									
								
								README.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										40
									
								
								README.md
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,40 @@ | |||||||
|  | # nilmdb: Non-Intrusive Load Monitor Database | ||||||
|  | by Jim Paris <jim@jtan.com> | ||||||
|  |  | ||||||
|  | NilmDB requires Python 3.8 or newer. | ||||||
|  |  | ||||||
|  | ## Prerequisites: | ||||||
|  |  | ||||||
|  |     # Runtime and build environments | ||||||
|  |     sudo apt install python3 python3-dev python3-venv python3-pip | ||||||
|  |  | ||||||
|  |     # Create a new Python virtual environment to isolate deps. | ||||||
|  |     python3 -m venv ../venv | ||||||
|  |     source ../venv/bin/activate   # run "deactivate" to leave | ||||||
|  |  | ||||||
|  |     # Install all Python dependencies | ||||||
|  |     pip3 install -r requirements.txt | ||||||
|  |  | ||||||
|  | ## Test: | ||||||
|  |  | ||||||
|  |     python3 setup.py nosetests | ||||||
|  |  | ||||||
|  | ## Install: | ||||||
|  |  | ||||||
|  | Install it into the virtual environment | ||||||
|  |  | ||||||
|  |     python3 setup.py install | ||||||
|  |  | ||||||
|  | If you want to instead install it system-wide, you will also need to | ||||||
|  | install the requirements system-wide: | ||||||
|  |  | ||||||
|  |     sudo pip3 install -r requirements.txt | ||||||
|  |     sudo python3 setup.py install | ||||||
|  |  | ||||||
|  | ## Usage: | ||||||
|  |  | ||||||
|  |     nilmdb-server --help | ||||||
|  |     nilmdb-fsck --help | ||||||
|  |     nilmtool --help | ||||||
|  |  | ||||||
|  | See docs/wsgi.md for info on setting up a WSGI application in Apache. | ||||||
| @@ -1,4 +0,0 @@ | |||||||
| To install,  |  | ||||||
|  |  | ||||||
|    python seutp.py install |  | ||||||
|  |  | ||||||
| @@ -1,26 +0,0 @@ | |||||||
| #!/usr/bin/python |  | ||||||
|  |  | ||||||
| from nilmdb import Interval |  | ||||||
| from optparse import OptionParser |  | ||||||
| import sys |  | ||||||
|  |  | ||||||
| version = "1.0" |  | ||||||
|  |  | ||||||
| parser = OptionParser() |  | ||||||
| parser.add_option("-d", "--db", dest="database", metavar="DATABASE", |  | ||||||
|                   help="location of sqlite database") |  | ||||||
| parser.add_option("-V", "--version", dest="version", default=False, action="store_true", |  | ||||||
|                   help="print version then exit") |  | ||||||
|  |  | ||||||
| (options, args) = parser.parse_args() |  | ||||||
|  |  | ||||||
| if (options.version): |  | ||||||
|     print "This script version: " + version |  | ||||||
|     sys.exit(0) |  | ||||||
|  |  | ||||||
| if options.database is None: |  | ||||||
|     print "Error: database is mandatory" |  | ||||||
|     sys.exit(1) |  | ||||||
|  |  | ||||||
| print "Database is " + options.database |  | ||||||
|  |  | ||||||
							
								
								
									
										9
									
								
								docs/Makefile
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										9
									
								
								docs/Makefile
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,9 @@ | |||||||
|  | ALL_DOCS = $(wildcard *.md) | ||||||
|  |  | ||||||
|  | all: $(ALL_DOCS:.md=.html) | ||||||
|  |  | ||||||
|  | %.html: %.md | ||||||
|  | 	pandoc -s $< > $@ | ||||||
|  |  | ||||||
|  | clean: | ||||||
|  | 	rm -f *.html | ||||||
							
								
								
									
										5
									
								
								docs/TODO.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										5
									
								
								docs/TODO.md
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,5 @@ | |||||||
|  | - Documentation | ||||||
|  |  | ||||||
|  | - Machine-readable information in OverflowError, parser errors. | ||||||
|  |   Maybe subclass `cherrypy.HTTPError` and override `set_response` | ||||||
|  |   to add another JSON field? | ||||||
							
								
								
									
										469
									
								
								docs/design.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										469
									
								
								docs/design.md
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,469 @@ | |||||||
|  | Structure | ||||||
|  | --------- | ||||||
|  | nilmdb.nilmdb is the NILM database interface.  A nilmdb.BulkData | ||||||
|  | interface stores data in flat files, and a SQL database tracks | ||||||
|  | metadata and ranges. | ||||||
|  |  | ||||||
|  | Access to the nilmdb must be single-threaded.  This is handled with | ||||||
|  | the nilmdb.serializer class.  In the future this could probably | ||||||
|  | be turned into a per-path serialization. | ||||||
|  |  | ||||||
|  | nilmdb.server is a HTTP server that provides an interface to talk, | ||||||
|  | thorugh the serialization layer, to the nilmdb object. | ||||||
|  |  | ||||||
|  | nilmdb.client is a HTTP client that connects to this. | ||||||
|  |  | ||||||
|  | Sqlite performance | ||||||
|  | ------------------ | ||||||
|  |  | ||||||
|  | Committing a transaction in the default sync mode (PRAGMA synchronous=FULL) | ||||||
|  | takes about 125msec.  sqlite3 will commit transactions at 3 times: | ||||||
|  |  | ||||||
|  | 1. explicit con.commit() | ||||||
|  |  | ||||||
|  | 2. between a series of DML commands and non-DML commands, e.g. | ||||||
|  |    after a series of INSERT, SELECT, but before a CREATE TABLE or | ||||||
|  |    PRAGMA. | ||||||
|  |  | ||||||
|  | 3. at the end of an explicit transaction, e.g. "with self.con as con:" | ||||||
|  |  | ||||||
|  | To speed up testing, or if this transaction speed becomes an issue, | ||||||
|  | the sync=False option to NilmDB will set PRAGMA synchronous=OFF. | ||||||
|  |  | ||||||
|  |  | ||||||
|  | Inserting streams | ||||||
|  | ----------------- | ||||||
|  |  | ||||||
|  | We need to send the contents of "data" as POST.  Do we need chunked | ||||||
|  | transfer? | ||||||
|  |  | ||||||
|  | - Don't know the size in advance, so we would need to use chunked if | ||||||
|  |   we send the entire thing in one request. | ||||||
|  | - But we shouldn't send one chunk per line, so we need to buffer some | ||||||
|  |   anyway; why not just make new requests? | ||||||
|  | - Consider the infinite-streaming case, we might want to send it | ||||||
|  |   immediately?  Not really -- server still should do explicit inserts | ||||||
|  |   of fixed-size chunks. | ||||||
|  | - Even chunked encoding needs the size of each chunk beforehand, so | ||||||
|  |   everything still gets buffered.  Just a tradeoff of buffer size. | ||||||
|  |  | ||||||
|  | Before timestamps are added: | ||||||
|  |  | ||||||
|  | - Raw data is about 440 kB/s    (9 channels) | ||||||
|  | - Prep data is about 12.5 kB/s  (1 phase) | ||||||
|  | - How do we know how much data to send? | ||||||
|  |  | ||||||
|  |     - Remember that we can only do maybe 8-50 transactions per second on | ||||||
|  |       the sqlite database.  So if one block of inserted data is one | ||||||
|  |       transaction, we'd need the raw case to be around 64kB per request, | ||||||
|  |       ideally more. | ||||||
|  |     - Maybe use a range, based on how long it's taking to read the data | ||||||
|  |         - If no more data, send it | ||||||
|  |         - If data > 1 MB, send it | ||||||
|  |     - If more than 10 seconds have elapsed, send it | ||||||
|  |     - Should those numbers come from the server? | ||||||
|  |  | ||||||
|  | Converting from ASCII to PyTables: | ||||||
|  |  | ||||||
|  | - For each row getting added, we need to set attributes on a PyTables | ||||||
|  |   Row object and call table.append().  This means that there isn't a | ||||||
|  |   particularly efficient way of converting from ascii. | ||||||
|  | - Could create a function like nilmdb.layout.Layout("foo".fillRow(asciiline) | ||||||
|  |     - But this means we're doing parsing on the serialized side | ||||||
|  |     - Let's keep parsing on the threaded server side so we can detect | ||||||
|  |       errors better, and not block the serialized nilmdb for a slow | ||||||
|  |       parsing process. | ||||||
|  | - Client sends ASCII data | ||||||
|  | - Server converts this ACSII data to a list of values | ||||||
|  |     - Maybe: | ||||||
|  |  | ||||||
|  |             # threaded side creates this object | ||||||
|  |             parser = nilmdb.layout.Parser("layout_name") | ||||||
|  |             # threaded side parses and fills it with data | ||||||
|  |             parser.parse(textdata) | ||||||
|  |             # serialized side pulls out rows | ||||||
|  |             for n in xrange(parser.nrows): | ||||||
|  |                 parser.fill_row(rowinstance, n) | ||||||
|  |                 table.append() | ||||||
|  |  | ||||||
|  |  | ||||||
|  | Inserting streams, inside nilmdb | ||||||
|  | -------------------------------- | ||||||
|  |  | ||||||
|  | - First check that the new stream doesn't overlap. | ||||||
|  |     - Get minimum timestamp, maximum timestamp from data parser. | ||||||
|  |         - (extend parser to verify monotonicity and track extents) | ||||||
|  |     - Get all intervals for this stream in the database | ||||||
|  |     - See if new interval overlaps any existing ones | ||||||
|  |         - If so, bail | ||||||
|  |     - Question: should we cache intervals inside NilmDB? | ||||||
|  |         - Assume database is fast for now, and always rebuild fom DB. | ||||||
|  |         - Can add a caching layer later if we need to. | ||||||
|  |     - `stream_get_ranges(path)` -> return IntervalSet? | ||||||
|  |  | ||||||
|  | Speed | ||||||
|  | ----- | ||||||
|  |  | ||||||
|  | - First approach was quadratic.  Adding four hours of data: | ||||||
|  |  | ||||||
|  |         $ time zcat /home/jim/bpnilm-data/snapshot-1-20110513-110002.raw.gz | ./nilmtool.py insert -s 20110513-110000 /bpnilm/1/raw | ||||||
|  |         real    24m31.093s | ||||||
|  |         $ time zcat /home/jim/bpnilm-data/snapshot-1-20110513-110002.raw.gz | ./nilmtool.py insert -s 20110513-120001 /bpnilm/1/raw | ||||||
|  |         real    43m44.528s | ||||||
|  |         $ time zcat /home/jim/bpnilm-data/snapshot-1-20110513-110002.raw.gz | ./nilmtool.py insert -s 20110513-130002 /bpnilm/1/raw | ||||||
|  |         real    93m29.713s | ||||||
|  |         $ time zcat /home/jim/bpnilm-data/snapshot-1-20110513-110002.raw.gz | ./nilmtool.py insert -s 20110513-140003 /bpnilm/1/raw | ||||||
|  |         real    166m53.007s | ||||||
|  |  | ||||||
|  | - Disabling pytables indexing didn't help: | ||||||
|  |  | ||||||
|  |         real    31m21.492s | ||||||
|  |         real    52m51.963s | ||||||
|  |         real    102m8.151s | ||||||
|  |         real    176m12.469s | ||||||
|  |  | ||||||
|  | - Server RAM usage is constant. | ||||||
|  |  | ||||||
|  | - Speed problems were due to IntervalSet speed, of parsing intervals | ||||||
|  |   from the database and adding the new one each time. | ||||||
|  |  | ||||||
|  |     - First optimization is to cache result of `nilmdb:_get_intervals`, | ||||||
|  |       which gives the best speedup. | ||||||
|  |  | ||||||
|  |     - Also switched to internally using bxInterval from bx-python package. | ||||||
|  |       Speed of `tests/test_interval:TestIntervalSpeed` is pretty decent | ||||||
|  |       and seems to be growing logarithmically now.  About 85μs per insertion | ||||||
|  |       for inserting 131k entries. | ||||||
|  |  | ||||||
|  |     - Storing the interval data in SQL might be better, with a scheme like: | ||||||
|  |       http://www.logarithmic.net/pfh/blog/01235197474 | ||||||
|  |  | ||||||
|  | - Next slowdown target is nilmdb.layout.Parser.parse(). | ||||||
|  |     - Rewrote parsers using cython and sscanf | ||||||
|  |     - Stats (rev 10831), with `_add_interval` disabled | ||||||
|  |  | ||||||
|  |         layout.pyx.Parser.parse:128        6303 sec, 262k calls | ||||||
|  |          layout.pyx.parse:63               13913 sec, 5.1g calls | ||||||
|  |         numpy:records.py.fromrecords:569   7410 sec, 262k calls | ||||||
|  |  | ||||||
|  | - Probably OK for now. | ||||||
|  |  | ||||||
|  | - After all updates, now takes about 8.5 minutes to insert an hour of | ||||||
|  |   data, constant after adding 171 hours (4.9 billion data points) | ||||||
|  |  | ||||||
|  | - Data set size: 98 gigs = 20 bytes per data point. | ||||||
|  |   6 uint16 data + 1 uint32 timestamp = 16 bytes per point | ||||||
|  |   So compression must be off -- will retry with compression forced on. | ||||||
|  |  | ||||||
|  | IntervalSet speed | ||||||
|  | ----------------- | ||||||
|  | - Initial implementation was pretty slow, even with binary search in | ||||||
|  |   sorted list | ||||||
|  |  | ||||||
|  | - Replaced with bxInterval; now takes about log n time for an insertion | ||||||
|  |     - TestIntervalSpeed with range(17,18) and profiling | ||||||
|  |         - 85 μs each | ||||||
|  |         - 131072 calls to `__iadd__` | ||||||
|  |         - 131072 to bx.insert_interval | ||||||
|  |         - 131072 to bx.insert:395 | ||||||
|  |         - 2355835 to bx.insert:106  (18x as many?) | ||||||
|  |  | ||||||
|  | - Tried blist too, worse than bxinterval. | ||||||
|  |  | ||||||
|  | - Might be algorithmic improvements to be made in Interval.py, | ||||||
|  |   like in `__and__` | ||||||
|  |  | ||||||
|  | - Replaced again with rbtree.  Seems decent.  Numbers are time per | ||||||
|  |   insert for 2**17 insertions, followed by total wall time and RAM | ||||||
|  |   usage for running "make test" with `test_rbtree` and `test_interval` | ||||||
|  |   with range(5,20): | ||||||
|  |     - old values with bxinterval: | ||||||
|  |       20.2 μS, total 20 s, 177 MB RAM | ||||||
|  |     - rbtree, plain python: | ||||||
|  |       97 μS, total 105 s, 846 MB RAM | ||||||
|  |     - rbtree converted to cython: | ||||||
|  |       26 μS, total 29 s, 320 MB RAM | ||||||
|  |     - rbtree and interval converted to cython: | ||||||
|  |       8.4 μS, total 12 s, 134 MB RAM | ||||||
|  |  | ||||||
|  | - Would like to move Interval itself back to Python so other | ||||||
|  |   non-cythonized code like client code can use it more easily. | ||||||
|  |   Testing speed with just `test_interval` being tested, with | ||||||
|  |   `range(5,22)`, using `/usr/bin/time -v python tests/runtests.py`, | ||||||
|  |   times recorded for 2097152: | ||||||
|  |     - 52ae397 (Interval in cython): | ||||||
|  | 	  12.6133 μs each, ratio 0.866533, total 47 sec, 399 MB RAM | ||||||
|  | 	- 9759dcf (Interval in python): | ||||||
|  | 	  21.2937 μs each, ratio 1.462870, total 83 sec, 1107 MB RAM | ||||||
|  |   That's a huge difference!  Instead, will keep Interval and DBInterval | ||||||
|  |   cythonized inside nilmdb, and just have an additional copy in | ||||||
|  |   nilmdb.utils for clients to use. | ||||||
|  |  | ||||||
|  | Layouts | ||||||
|  | ------- | ||||||
|  | Current/old design has specific layouts: RawData, PrepData, RawNotchedData. | ||||||
|  | Let's get rid of this entirely and switch to simpler data types that are | ||||||
|  | just collections and counts of a single type.  We'll still use strings | ||||||
|  | to describe them, with format: | ||||||
|  |  | ||||||
|  |     type_count | ||||||
|  |  | ||||||
|  | where type is "uint16", "float32", or "float64", and count is an integer. | ||||||
|  |  | ||||||
|  | nilmdb.layout.named() will parse these strings into the appropriate | ||||||
|  | handlers.  For compatibility: | ||||||
|  |  | ||||||
|  |     "RawData" == "uint16_6" | ||||||
|  |     "RawNotchedData" == "uint16_9" | ||||||
|  |     "PrepData" == "float32_8" | ||||||
|  |  | ||||||
|  |  | ||||||
|  | BulkData design | ||||||
|  | --------------- | ||||||
|  |  | ||||||
|  | BulkData is a custom bulk data storage system that was written to | ||||||
|  | replace PyTables.  The general structure is a `data` subdirectory in | ||||||
|  | the main NilmDB directory.  Within `data`, paths are created for each | ||||||
|  | created stream.  These locations are called tables.  For example, | ||||||
|  | tables might be located at | ||||||
|  |  | ||||||
|  |     nilmdb/data/newton/raw/ | ||||||
|  |     nilmdb/data/newton/prep/ | ||||||
|  |     nilmdb/data/cottage/raw/ | ||||||
|  |  | ||||||
|  | Each table contains: | ||||||
|  |  | ||||||
|  | - An unchanging `_format` file (Python pickle format) that describes | ||||||
|  |   parameters of how the data is broken up, like files per directory, | ||||||
|  |   rows per file, and the binary data format | ||||||
|  |  | ||||||
|  | - Hex named subdirectories `("%04x", although more than 65536 can exist)` | ||||||
|  |  | ||||||
|  | - Hex named files within those subdirectories, like: | ||||||
|  |  | ||||||
|  |         /nilmdb/data/newton/raw/000b/010a | ||||||
|  |  | ||||||
|  |     The data format of these files is raw binary, interpreted by the | ||||||
|  |     Python `struct` module according to the format string in the | ||||||
|  |     `_format` file. | ||||||
|  |  | ||||||
|  | - Same as above, with `.removed` suffix, is an optional file (Python | ||||||
|  |   pickle format) containing a list of row numbers that have been | ||||||
|  |   logically removed from the file.  If this range covers the entire | ||||||
|  |   file, the entire file will be removed. | ||||||
|  |  | ||||||
|  | - Note that the `bulkdata.nrows` variable is calculated once in | ||||||
|  |   `BulkData.__init__()`, and only ever incremented during use.  Thus, | ||||||
|  |   even if all data is removed, `nrows` can remain high.  However, if | ||||||
|  |   the server is restarted, the newly calculated `nrows` may be lower | ||||||
|  |   than in a previous run due to deleted data.  To be specific, this | ||||||
|  |   sequence of events: | ||||||
|  |  | ||||||
|  |     - insert data | ||||||
|  |     - remove all data | ||||||
|  |     - insert data | ||||||
|  |  | ||||||
|  |     will result in having different row numbers in the database, and | ||||||
|  |     differently numbered files on the filesystem, than the sequence: | ||||||
|  |  | ||||||
|  |     - insert data | ||||||
|  |     - remove all data | ||||||
|  |     - restart server | ||||||
|  |     - insert data | ||||||
|  |  | ||||||
|  |     This is okay!  Everything should remain consistent both in the | ||||||
|  |     `BulkData` and `NilmDB`.  Not attempting to readjust `nrows` during | ||||||
|  |     deletion makes the code quite a bit simpler. | ||||||
|  |  | ||||||
|  | - Similarly, data files are never truncated shorter.  Removing data | ||||||
|  |   from the end of the file will not shorten it; it will only be | ||||||
|  |   deleted when it has been fully filled and all of the data has been | ||||||
|  |   subsequently removed. | ||||||
|  |  | ||||||
|  |  | ||||||
|  | Rocket | ||||||
|  | ------ | ||||||
|  |  | ||||||
|  | Original design had the nilmdb.nilmdb thread (through bulkdata) | ||||||
|  | convert from on-disk layout to a Python list, and then the | ||||||
|  | nilmdb.server thread (from cherrypy) converts to ASCII.  For at least | ||||||
|  | the extraction side of things, it's easy to pass the bulkdata a layout | ||||||
|  | name instead, and have it convert directly from on-disk to ASCII | ||||||
|  | format, because this conversion can then be shoved into a C module. | ||||||
|  | This module, which provides a means for converting directly from | ||||||
|  | on-disk format to ASCII or Python lists, is the "rocket" interface. | ||||||
|  | Python is still used to manage the files and figure out where the | ||||||
|  | data should go; rocket just puts binary data directly in or out of | ||||||
|  | those files at specified locations. | ||||||
|  |  | ||||||
|  | Before rocket, testing speed with uint16_6 data, with an end-to-end | ||||||
|  | test (extracting data with nilmtool): | ||||||
|  |  | ||||||
|  | - insert: 65 klines/sec | ||||||
|  | - extract: 120 klines/sec | ||||||
|  |  | ||||||
|  | After switching to the rocket design, but using the Python version | ||||||
|  | (pyrocket): | ||||||
|  |  | ||||||
|  | - insert: 57 klines/sec | ||||||
|  | - extract: 120 klines/sec | ||||||
|  |  | ||||||
|  | After switching to a C extension module (rocket.c) | ||||||
|  |  | ||||||
|  | - insert: 74 klines/sec through insert.py; 99.6 klines/sec through nilmtool | ||||||
|  | - extract: 335 klines/sec | ||||||
|  |  | ||||||
|  | After client block updates (described below): | ||||||
|  |  | ||||||
|  | - insert: 180 klines/sec through nilmtool (pre-timestamped) | ||||||
|  | - extract: 390 klines/sec through nilmtool | ||||||
|  |  | ||||||
|  | Using "insert --timestamp" or "extract --bare" cuts the speed in half. | ||||||
|  |  | ||||||
|  | Blocks versus lines | ||||||
|  | ------------------- | ||||||
|  |  | ||||||
|  | Generally want to avoid parsing the bulk of the data as lines if | ||||||
|  | possible, and transfer things in bigger blocks at once. | ||||||
|  |  | ||||||
|  | Current places where we use lines: | ||||||
|  |  | ||||||
|  | - All data returned by `client.stream_extract`, since it comes from | ||||||
|  |   `httpclient.get_gen`, which iterates over lines.  Not sure if this | ||||||
|  |   should be changed, because a `nilmtool extract` is just about the | ||||||
|  |   same speed as `curl -q .../stream/extract`! | ||||||
|  |  | ||||||
|  | - `client.StreamInserter.insert_iter` and | ||||||
|  |   `client.StreamInserter.insert_line`, which should probably get | ||||||
|  |   replaced with block versions.  There's no real need to keep | ||||||
|  |   updating the timestamp every time we get a new line of data. | ||||||
|  |  | ||||||
|  |   - Finished.  Just a single insert() that takes any length string and | ||||||
|  |     does very little processing until it's time to send it to the | ||||||
|  | 	server. | ||||||
|  |  | ||||||
|  | Timestamps | ||||||
|  | ---------- | ||||||
|  |  | ||||||
|  | Timestamps are currently double-precision floats (64 bit).  Since the | ||||||
|  | mantissa is 53-bit, this can only represent about 15-17 significant | ||||||
|  | figures, and microsecond Unix timestamps like 1222333444.000111 are | ||||||
|  | already 16 significant figures.  Rounding is therefore an issue; | ||||||
|  | it's hard to sure that converting from ASCII, then back to ASCII, | ||||||
|  | will always give the same result. | ||||||
|  |  | ||||||
|  | Also, if the client provides a floating point value like 1.9999999999, | ||||||
|  | we need to be careful that we don't store it as 1.9999999999 but later | ||||||
|  | print it as 2.000000, because then round-trips change the data. | ||||||
|  |  | ||||||
|  | Possible solutions: | ||||||
|  |  | ||||||
|  | - When the client provides a floating point value to the server, | ||||||
|  |   always round to the 6th decimal digit before verifying & storing. | ||||||
|  |   Good for compatibility and simplicity.  But still might have rounding | ||||||
|  |   issues, and clients will also need to round when doing their own | ||||||
|  |   verification.  Having every piece of code need to know which digit | ||||||
|  |   to round at is not ideal. | ||||||
|  |  | ||||||
|  | - Always store int64 timestamps on the server, representing | ||||||
|  |   microseconds since epoch.  int64 timestamps are used in all HTTP | ||||||
|  |   parameters, in insert/extract ASCII strings, client API, commandline | ||||||
|  |   raw timestamps, etc.  Pretty big change. | ||||||
|  |  | ||||||
|  |   This is what we'll go with... | ||||||
|  |  | ||||||
|  |   - Client programs that interpret the timestamps as doubles instead | ||||||
|  |     of ints will remain accurate until 2^53 microseconds, or year | ||||||
|  |     2255. | ||||||
|  |  | ||||||
|  |   - On insert, maybe it's OK to send floating point microsecond values | ||||||
|  |     (1234567890123456.0), just to cope with clients that want to print | ||||||
|  |     everything as a double.  Server could try parsing as int64, and if | ||||||
|  |     that fails, parse as double and truncate to int64.  However, this | ||||||
|  |     wouldn't catch imprecise inputs like "1.23456789012e+15".  But | ||||||
|  |     maybe that can just be ignored; it's likely to cause a | ||||||
|  |     non-monotonic error at the client. | ||||||
|  |  | ||||||
|  |   - Timestamps like 1234567890.123456 never show up anywhere, except | ||||||
|  |     for interfacing to datetime_tz etc.  Command line "raw timestamps" | ||||||
|  |     are always printed as int64 values, and a new format | ||||||
|  |     "@1234567890123456" is added to the parser for specifying them | ||||||
|  |     exactly. | ||||||
|  |  | ||||||
|  | Binary interface | ||||||
|  | ---------------- | ||||||
|  |  | ||||||
|  | The ASCII interface is too slow for high-bandwidth processing, like | ||||||
|  | sinefits, prep, etc.  A binary interface was added so that you can | ||||||
|  | extract the raw binary out of the bulkdata storage.  This binary is | ||||||
|  | a little-endian format, e.g. in C a uint16_6 stream would be: | ||||||
|  |  | ||||||
|  |     #include <endian.h> | ||||||
|  |     #include <stdint.h> | ||||||
|  |     struct { | ||||||
|  |         int64_t timestamp_le; | ||||||
|  |         uint16_t data_le[6]; | ||||||
|  |     } __attribute__((packed)); | ||||||
|  |  | ||||||
|  | Remember to byteswap (with e.g. `letoh` in C)! | ||||||
|  |  | ||||||
|  | This interface is used by the new `nilmdb.client.numpyclient.NumpyClient` | ||||||
|  | class, which is a subclass of the normal `nilmcb.client.client.Client` | ||||||
|  | and has all of the same functions.  It adds three new functions: | ||||||
|  |  | ||||||
|  | - `stream_extract_numpy` to extract data as a Numpy array | ||||||
|  |  | ||||||
|  | - `stream_insert_numpy` to insert data as a Numpy array | ||||||
|  |  | ||||||
|  | - `stream_insert_numpy_context` is the context manager for | ||||||
|  |   incrementally inserting data | ||||||
|  |  | ||||||
|  | It is significantly faster!  It is about 20 times faster to decimate a | ||||||
|  | stream with `nilm-decimate` when the filter code is using the new | ||||||
|  | binary/numpy interface. | ||||||
|  |  | ||||||
|  |  | ||||||
|  | WSGI interface & chunked requests | ||||||
|  | --------------------------------- | ||||||
|  |  | ||||||
|  | mod_wsgi requires "WSGIChunkedRequest On" to handle | ||||||
|  | "Transfer-encoding: Chunked" requests.  However, `/stream/insert` | ||||||
|  | doesn't handle this correctly right now, because: | ||||||
|  |  | ||||||
|  | - The `cherrypy.request.body.read()` call needs to be fixed for chunked requests | ||||||
|  |  | ||||||
|  | - We don't want to just buffer endlessly in the server, and it will | ||||||
|  |   require some thought on how to handle data in chunks (what to do about | ||||||
|  |   interval endpoints). | ||||||
|  |  | ||||||
|  | It is probably better to just keep the endpoint management on the client | ||||||
|  | side, so leave "WSGIChunkedRequest off" for now. | ||||||
|  |  | ||||||
|  |  | ||||||
|  | Unicode & character encoding | ||||||
|  | ---------------------------- | ||||||
|  |  | ||||||
|  | Stream data is passed back and forth as raw `bytes` objects in most | ||||||
|  | places, including the `nilmdb.client` and command-line interfaces. | ||||||
|  | This is done partially for performance reasons, and partially to | ||||||
|  | support the binary insert/extract options, where character-set encoding | ||||||
|  | would not apply. | ||||||
|  |  | ||||||
|  | For the HTTP server, the raw bytes transferred over HTTP are interpreted | ||||||
|  | as follows: | ||||||
|  | - For `/stream/insert`, the client-provided `Content-Type` is ignored, | ||||||
|  |   and the data is read as if it were `application/octet-stream`. | ||||||
|  | - For `/stream/extract`, the returned data is `application/octet-stream`. | ||||||
|  | - All other endpoints communicate via JSON, which is specified to always | ||||||
|  |   be encoded as UTF-8.  This includes: | ||||||
|  |     - `/version` | ||||||
|  |     - `/dbinfo` | ||||||
|  |     - `/stream/list` | ||||||
|  |     - `/stream/create` | ||||||
|  |     - `/stream/destroy` | ||||||
|  |     - `/stream/rename` | ||||||
|  |     - `/stream/get_metadata` | ||||||
|  |     - `/stream/set_metadata` | ||||||
|  |     - `/stream/update_metadata` | ||||||
|  |     - `/stream/remove` | ||||||
|  |     - `/stream/intervals` | ||||||
							
								
								
									
										32
									
								
								docs/wsgi.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										32
									
								
								docs/wsgi.md
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,32 @@ | |||||||
|  | WSGI Application in Apache | ||||||
|  | -------------------------- | ||||||
|  |  | ||||||
|  | Install `apache2` and `libapache2-mod-wsgi` | ||||||
|  |  | ||||||
|  | We'll set up the database server at URL `http://myhost.com/nilmdb`. | ||||||
|  | The database will be stored in `/home/nilm/db`, and the process will | ||||||
|  | run as user `nilm`, group `nilm`. | ||||||
|  |  | ||||||
|  | First, create a WSGI script `/home/nilm/nilmdb.wsgi` containing: | ||||||
|  |  | ||||||
|  |     import nilmdb.server | ||||||
|  |     application = nilmdb.server.wsgi_application("/home/nilm/db", "/nilmdb") | ||||||
|  |  | ||||||
|  | The first parameter is the local filesystem path, and the second | ||||||
|  | parameter is the path part of the URL. | ||||||
|  |  | ||||||
|  | Then, set up Apache with a configuration like: | ||||||
|  |  | ||||||
|  |     <VirtualHost> | ||||||
|  |         WSGIScriptAlias /nilmdb /home/nilm/nilmdb.wsgi | ||||||
|  |         WSGIDaemonProcess nilmdb-procgroup threads=32 user=nilm group=nilm | ||||||
|  |         <Location /nilmdb> | ||||||
|  |             WSGIProcessGroup nilmdb-procgroup | ||||||
|  |             WSGIApplicationGroup nilmdb-appgroup | ||||||
|  |  | ||||||
|  |             # Access control example: | ||||||
|  |             Order deny,allow | ||||||
|  |             Deny from all | ||||||
|  |             Allow from 1.2.3.4 | ||||||
|  |         </Location> | ||||||
|  |     </VirtualHost> | ||||||
							
								
								
									
										50
									
								
								extras/fix-oversize-files.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										50
									
								
								extras/fix-oversize-files.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,50 @@ | |||||||
|  | #!/usr/bin/env python3 | ||||||
|  |  | ||||||
|  | import os | ||||||
|  | import sys | ||||||
|  | import pickle | ||||||
|  | import argparse | ||||||
|  | import fcntl | ||||||
|  | import re | ||||||
|  | from nilmdb.client.numpyclient import layout_to_dtype | ||||||
|  |  | ||||||
|  | parser = argparse.ArgumentParser( | ||||||
|  |     description = """ | ||||||
|  | Fix database corruption where binary writes caused too much data to be | ||||||
|  | written to the file.  Truncates files to the correct length.  This was | ||||||
|  | fixed by b98ff1331a515ad47fd3203615e835b529b039f9. | ||||||
|  | """) | ||||||
|  | parser.add_argument("path", action="store", help='Database root path') | ||||||
|  | parser.add_argument("-y", "--yes", action="store_true", help='Fix them') | ||||||
|  | args = parser.parse_args() | ||||||
|  |  | ||||||
|  | lock = os.path.join(args.path, "data.lock") | ||||||
|  | with open(lock, "w") as f: | ||||||
|  |     fcntl.flock(f.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB) | ||||||
|  |  | ||||||
|  |     fix = {} | ||||||
|  |  | ||||||
|  |     for (path, dirs, files) in os.walk(args.path): | ||||||
|  |         if "_format" in files: | ||||||
|  |             with open(os.path.join(path, "_format")) as format: | ||||||
|  |                 fmt = pickle.load(format) | ||||||
|  |                 rowsize = layout_to_dtype(fmt["layout"]).itemsize | ||||||
|  |                 maxsize = rowsize * fmt["rows_per_file"] | ||||||
|  |                 fix[path] = maxsize | ||||||
|  |                 if maxsize < 128000000: # sanity check | ||||||
|  |                     raise Exception("bad maxsize " + str(maxsize)) | ||||||
|  |  | ||||||
|  |     for fixpath in fix: | ||||||
|  |         for (path, dirs, files) in os.walk(fixpath): | ||||||
|  |             for fn in files: | ||||||
|  |                 if not re.match("^[0-9a-f]{4,}$", fn): | ||||||
|  |                     continue | ||||||
|  |                 fn = os.path.join(path, fn) | ||||||
|  |                 size = os.path.getsize(fn) | ||||||
|  |                 maxsize = fix[fixpath] | ||||||
|  |                 if size > maxsize: | ||||||
|  |                     diff = size - maxsize | ||||||
|  |                     print(diff, "too big:", fn) | ||||||
|  |                     if args.yes: | ||||||
|  |                         with open(fn, "a+") as dbfile: | ||||||
|  |                             dbfile.truncate(maxsize) | ||||||
							
								
								
									
										20
									
								
								extras/nilmtool-bash-completion.sh
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										20
									
								
								extras/nilmtool-bash-completion.sh
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,20 @@ | |||||||
|  | # To enable bash completion: | ||||||
|  | # | ||||||
|  | # 1. Ensure python-argcomplete is installed: | ||||||
|  | #       pip install argcomplete | ||||||
|  | # 2. Source this file: | ||||||
|  | #       . nilmtool-bash-completion.sh | ||||||
|  |  | ||||||
|  | _nilmtool_argcomplete() { | ||||||
|  |     local IFS=$(printf "\013") | ||||||
|  |     COMPREPLY=( $(IFS="$IFS" \ | ||||||
|  |                   COMP_LINE="$COMP_LINE" \ | ||||||
|  | 	          COMP_WORDBREAKS="$COMP_WORDBREAKS" \ | ||||||
|  |                   COMP_POINT="$COMP_POINT" \ | ||||||
|  |                   _ARGCOMPLETE=1 \ | ||||||
|  |                   "$1" 8>&1 9>&2 1>/dev/null 2>/dev/null) ) | ||||||
|  |     if [[ $? != 0 ]]; then | ||||||
|  |         unset COMPREPLY | ||||||
|  |     fi | ||||||
|  | } | ||||||
|  | complete -o nospace -F _nilmtool_argcomplete nilmtool | ||||||
| @@ -1,2 +1,5 @@ | |||||||
| from nilmdb.interval import * | """Main NilmDB import""" | ||||||
| from nilmdb.fileinterval import * |  | ||||||
|  | from ._version import get_versions | ||||||
|  | __version__ = get_versions()['version'] | ||||||
|  | del get_versions | ||||||
|   | |||||||
							
								
								
									
										520
									
								
								nilmdb/_version.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										520
									
								
								nilmdb/_version.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,520 @@ | |||||||
|  |  | ||||||
|  | # This file helps to compute a version number in source trees obtained from | ||||||
|  | # git-archive tarball (such as those provided by githubs download-from-tag | ||||||
|  | # feature). Distribution tarballs (built by setup.py sdist) and build | ||||||
|  | # directories (produced by setup.py build) will contain a much shorter file | ||||||
|  | # that just contains the computed version number. | ||||||
|  |  | ||||||
|  | # This file is released into the public domain. Generated by | ||||||
|  | # versioneer-0.18 (https://github.com/warner/python-versioneer) | ||||||
|  |  | ||||||
|  | """Git implementation of _version.py.""" | ||||||
|  |  | ||||||
|  | import errno | ||||||
|  | import os | ||||||
|  | import re | ||||||
|  | import subprocess | ||||||
|  | import sys | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def get_keywords(): | ||||||
|  |     """Get the keywords needed to look up the version information.""" | ||||||
|  |     # these strings will be replaced by git during git-archive. | ||||||
|  |     # setup.py/versioneer.py will grep for the variable names, so they must | ||||||
|  |     # each be defined on a line of their own. _version.py will just call | ||||||
|  |     # get_keywords(). | ||||||
|  |     git_refnames = "$Format:%d$" | ||||||
|  |     git_full = "$Format:%H$" | ||||||
|  |     git_date = "$Format:%ci$" | ||||||
|  |     keywords = {"refnames": git_refnames, "full": git_full, "date": git_date} | ||||||
|  |     return keywords | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class VersioneerConfig: | ||||||
|  |     """Container for Versioneer configuration parameters.""" | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def get_config(): | ||||||
|  |     """Create, populate and return the VersioneerConfig() object.""" | ||||||
|  |     # these strings are filled in when 'setup.py versioneer' creates | ||||||
|  |     # _version.py | ||||||
|  |     cfg = VersioneerConfig() | ||||||
|  |     cfg.VCS = "git" | ||||||
|  |     cfg.style = "pep440" | ||||||
|  |     cfg.tag_prefix = "nilmdb-" | ||||||
|  |     cfg.parentdir_prefix = "nilmdb-" | ||||||
|  |     cfg.versionfile_source = "nilmdb/_version.py" | ||||||
|  |     cfg.verbose = False | ||||||
|  |     return cfg | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class NotThisMethod(Exception): | ||||||
|  |     """Exception raised if a method is not valid for the current scenario.""" | ||||||
|  |  | ||||||
|  |  | ||||||
|  | LONG_VERSION_PY = {} | ||||||
|  | HANDLERS = {} | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def register_vcs_handler(vcs, method):  # decorator | ||||||
|  |     """Decorator to mark a method as the handler for a particular VCS.""" | ||||||
|  |     def decorate(f): | ||||||
|  |         """Store f in HANDLERS[vcs][method].""" | ||||||
|  |         if vcs not in HANDLERS: | ||||||
|  |             HANDLERS[vcs] = {} | ||||||
|  |         HANDLERS[vcs][method] = f | ||||||
|  |         return f | ||||||
|  |     return decorate | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, | ||||||
|  |                 env=None): | ||||||
|  |     """Call the given command(s).""" | ||||||
|  |     assert isinstance(commands, list) | ||||||
|  |     p = None | ||||||
|  |     for c in commands: | ||||||
|  |         try: | ||||||
|  |             dispcmd = str([c] + args) | ||||||
|  |             # remember shell=False, so use git.cmd on windows, not just git | ||||||
|  |             p = subprocess.Popen([c] + args, cwd=cwd, env=env, | ||||||
|  |                                  stdout=subprocess.PIPE, | ||||||
|  |                                  stderr=(subprocess.PIPE if hide_stderr | ||||||
|  |                                          else None)) | ||||||
|  |             break | ||||||
|  |         except EnvironmentError: | ||||||
|  |             e = sys.exc_info()[1] | ||||||
|  |             if e.errno == errno.ENOENT: | ||||||
|  |                 continue | ||||||
|  |             if verbose: | ||||||
|  |                 print("unable to run %s" % dispcmd) | ||||||
|  |                 print(e) | ||||||
|  |             return None, None | ||||||
|  |     else: | ||||||
|  |         if verbose: | ||||||
|  |             print("unable to find command, tried %s" % (commands,)) | ||||||
|  |         return None, None | ||||||
|  |     stdout = p.communicate()[0].strip() | ||||||
|  |     if sys.version_info[0] >= 3: | ||||||
|  |         stdout = stdout.decode() | ||||||
|  |     if p.returncode != 0: | ||||||
|  |         if verbose: | ||||||
|  |             print("unable to run %s (error)" % dispcmd) | ||||||
|  |             print("stdout was %s" % stdout) | ||||||
|  |         return None, p.returncode | ||||||
|  |     return stdout, p.returncode | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def versions_from_parentdir(parentdir_prefix, root, verbose): | ||||||
|  |     """Try to determine the version from the parent directory name. | ||||||
|  |  | ||||||
|  |     Source tarballs conventionally unpack into a directory that includes both | ||||||
|  |     the project name and a version string. We will also support searching up | ||||||
|  |     two directory levels for an appropriately named parent directory | ||||||
|  |     """ | ||||||
|  |     rootdirs = [] | ||||||
|  |  | ||||||
|  |     for i in range(3): | ||||||
|  |         dirname = os.path.basename(root) | ||||||
|  |         if dirname.startswith(parentdir_prefix): | ||||||
|  |             return {"version": dirname[len(parentdir_prefix):], | ||||||
|  |                     "full-revisionid": None, | ||||||
|  |                     "dirty": False, "error": None, "date": None} | ||||||
|  |         else: | ||||||
|  |             rootdirs.append(root) | ||||||
|  |             root = os.path.dirname(root)  # up a level | ||||||
|  |  | ||||||
|  |     if verbose: | ||||||
|  |         print("Tried directories %s but none started with prefix %s" % | ||||||
|  |               (str(rootdirs), parentdir_prefix)) | ||||||
|  |     raise NotThisMethod("rootdir doesn't start with parentdir_prefix") | ||||||
|  |  | ||||||
|  |  | ||||||
|  | @register_vcs_handler("git", "get_keywords") | ||||||
|  | def git_get_keywords(versionfile_abs): | ||||||
|  |     """Extract version information from the given file.""" | ||||||
|  |     # the code embedded in _version.py can just fetch the value of these | ||||||
|  |     # keywords. When used from setup.py, we don't want to import _version.py, | ||||||
|  |     # so we do it with a regexp instead. This function is not used from | ||||||
|  |     # _version.py. | ||||||
|  |     keywords = {} | ||||||
|  |     try: | ||||||
|  |         f = open(versionfile_abs, "r") | ||||||
|  |         for line in f.readlines(): | ||||||
|  |             if line.strip().startswith("git_refnames ="): | ||||||
|  |                 mo = re.search(r'=\s*"(.*)"', line) | ||||||
|  |                 if mo: | ||||||
|  |                     keywords["refnames"] = mo.group(1) | ||||||
|  |             if line.strip().startswith("git_full ="): | ||||||
|  |                 mo = re.search(r'=\s*"(.*)"', line) | ||||||
|  |                 if mo: | ||||||
|  |                     keywords["full"] = mo.group(1) | ||||||
|  |             if line.strip().startswith("git_date ="): | ||||||
|  |                 mo = re.search(r'=\s*"(.*)"', line) | ||||||
|  |                 if mo: | ||||||
|  |                     keywords["date"] = mo.group(1) | ||||||
|  |         f.close() | ||||||
|  |     except EnvironmentError: | ||||||
|  |         pass | ||||||
|  |     return keywords | ||||||
|  |  | ||||||
|  |  | ||||||
|  | @register_vcs_handler("git", "keywords") | ||||||
|  | def git_versions_from_keywords(keywords, tag_prefix, verbose): | ||||||
|  |     """Get version information from git keywords.""" | ||||||
|  |     if not keywords: | ||||||
|  |         raise NotThisMethod("no keywords at all, weird") | ||||||
|  |     date = keywords.get("date") | ||||||
|  |     if date is not None: | ||||||
|  |         # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant | ||||||
|  |         # datestamp. However we prefer "%ci" (which expands to an "ISO-8601 | ||||||
|  |         # -like" string, which we must then edit to make compliant), because | ||||||
|  |         # it's been around since git-1.5.3, and it's too difficult to | ||||||
|  |         # discover which version we're using, or to work around using an | ||||||
|  |         # older one. | ||||||
|  |         date = date.strip().replace(" ", "T", 1).replace(" ", "", 1) | ||||||
|  |     refnames = keywords["refnames"].strip() | ||||||
|  |     if refnames.startswith("$Format"): | ||||||
|  |         if verbose: | ||||||
|  |             print("keywords are unexpanded, not using") | ||||||
|  |         raise NotThisMethod("unexpanded keywords, not a git-archive tarball") | ||||||
|  |     refs = set([r.strip() for r in refnames.strip("()").split(",")]) | ||||||
|  |     # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of | ||||||
|  |     # just "foo-1.0". If we see a "tag: " prefix, prefer those. | ||||||
|  |     TAG = "tag: " | ||||||
|  |     tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)]) | ||||||
|  |     if not tags: | ||||||
|  |         # Either we're using git < 1.8.3, or there really are no tags. We use | ||||||
|  |         # a heuristic: assume all version tags have a digit. The old git %d | ||||||
|  |         # expansion behaves like git log --decorate=short and strips out the | ||||||
|  |         # refs/heads/ and refs/tags/ prefixes that would let us distinguish | ||||||
|  |         # between branches and tags. By ignoring refnames without digits, we | ||||||
|  |         # filter out many common branch names like "release" and | ||||||
|  |         # "stabilization", as well as "HEAD" and "master". | ||||||
|  |         tags = set([r for r in refs if re.search(r'\d', r)]) | ||||||
|  |         if verbose: | ||||||
|  |             print("discarding '%s', no digits" % ",".join(refs - tags)) | ||||||
|  |     if verbose: | ||||||
|  |         print("likely tags: %s" % ",".join(sorted(tags))) | ||||||
|  |     for ref in sorted(tags): | ||||||
|  |         # sorting will prefer e.g. "2.0" over "2.0rc1" | ||||||
|  |         if ref.startswith(tag_prefix): | ||||||
|  |             r = ref[len(tag_prefix):] | ||||||
|  |             if verbose: | ||||||
|  |                 print("picking %s" % r) | ||||||
|  |             return {"version": r, | ||||||
|  |                     "full-revisionid": keywords["full"].strip(), | ||||||
|  |                     "dirty": False, "error": None, | ||||||
|  |                     "date": date} | ||||||
|  |     # no suitable tags, so version is "0+unknown", but full hex is still there | ||||||
|  |     if verbose: | ||||||
|  |         print("no suitable tags, using unknown + full revision id") | ||||||
|  |     return {"version": "0+unknown", | ||||||
|  |             "full-revisionid": keywords["full"].strip(), | ||||||
|  |             "dirty": False, "error": "no suitable tags", "date": None} | ||||||
|  |  | ||||||
|  |  | ||||||
|  | @register_vcs_handler("git", "pieces_from_vcs") | ||||||
|  | def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): | ||||||
|  |     """Get version from 'git describe' in the root of the source tree. | ||||||
|  |  | ||||||
|  |     This only gets called if the git-archive 'subst' keywords were *not* | ||||||
|  |     expanded, and _version.py hasn't already been rewritten with a short | ||||||
|  |     version string, meaning we're inside a checked out source tree. | ||||||
|  |     """ | ||||||
|  |     GITS = ["git"] | ||||||
|  |     if sys.platform == "win32": | ||||||
|  |         GITS = ["git.cmd", "git.exe"] | ||||||
|  |  | ||||||
|  |     out, rc = run_command(GITS, ["rev-parse", "--git-dir"], cwd=root, | ||||||
|  |                           hide_stderr=True) | ||||||
|  |     if rc != 0: | ||||||
|  |         if verbose: | ||||||
|  |             print("Directory %s not under git control" % root) | ||||||
|  |         raise NotThisMethod("'git rev-parse --git-dir' returned error") | ||||||
|  |  | ||||||
|  |     # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] | ||||||
|  |     # if there isn't one, this yields HEX[-dirty] (no NUM) | ||||||
|  |     describe_out, rc = run_command(GITS, ["describe", "--tags", "--dirty", | ||||||
|  |                                           "--always", "--long", | ||||||
|  |                                           "--match", "%s*" % tag_prefix], | ||||||
|  |                                    cwd=root) | ||||||
|  |     # --long was added in git-1.5.5 | ||||||
|  |     if describe_out is None: | ||||||
|  |         raise NotThisMethod("'git describe' failed") | ||||||
|  |     describe_out = describe_out.strip() | ||||||
|  |     full_out, rc = run_command(GITS, ["rev-parse", "HEAD"], cwd=root) | ||||||
|  |     if full_out is None: | ||||||
|  |         raise NotThisMethod("'git rev-parse' failed") | ||||||
|  |     full_out = full_out.strip() | ||||||
|  |  | ||||||
|  |     pieces = {} | ||||||
|  |     pieces["long"] = full_out | ||||||
|  |     pieces["short"] = full_out[:7]  # maybe improved later | ||||||
|  |     pieces["error"] = None | ||||||
|  |  | ||||||
|  |     # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] | ||||||
|  |     # TAG might have hyphens. | ||||||
|  |     git_describe = describe_out | ||||||
|  |  | ||||||
|  |     # look for -dirty suffix | ||||||
|  |     dirty = git_describe.endswith("-dirty") | ||||||
|  |     pieces["dirty"] = dirty | ||||||
|  |     if dirty: | ||||||
|  |         git_describe = git_describe[:git_describe.rindex("-dirty")] | ||||||
|  |  | ||||||
|  |     # now we have TAG-NUM-gHEX or HEX | ||||||
|  |  | ||||||
|  |     if "-" in git_describe: | ||||||
|  |         # TAG-NUM-gHEX | ||||||
|  |         mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe) | ||||||
|  |         if not mo: | ||||||
|  |             # unparseable. Maybe git-describe is misbehaving? | ||||||
|  |             pieces["error"] = ("unable to parse git-describe output: '%s'" | ||||||
|  |                                % describe_out) | ||||||
|  |             return pieces | ||||||
|  |  | ||||||
|  |         # tag | ||||||
|  |         full_tag = mo.group(1) | ||||||
|  |         if not full_tag.startswith(tag_prefix): | ||||||
|  |             if verbose: | ||||||
|  |                 fmt = "tag '%s' doesn't start with prefix '%s'" | ||||||
|  |                 print(fmt % (full_tag, tag_prefix)) | ||||||
|  |             pieces["error"] = ("tag '%s' doesn't start with prefix '%s'" | ||||||
|  |                                % (full_tag, tag_prefix)) | ||||||
|  |             return pieces | ||||||
|  |         pieces["closest-tag"] = full_tag[len(tag_prefix):] | ||||||
|  |  | ||||||
|  |         # distance: number of commits since tag | ||||||
|  |         pieces["distance"] = int(mo.group(2)) | ||||||
|  |  | ||||||
|  |         # commit: short hex revision ID | ||||||
|  |         pieces["short"] = mo.group(3) | ||||||
|  |  | ||||||
|  |     else: | ||||||
|  |         # HEX: no tags | ||||||
|  |         pieces["closest-tag"] = None | ||||||
|  |         count_out, rc = run_command(GITS, ["rev-list", "HEAD", "--count"], | ||||||
|  |                                     cwd=root) | ||||||
|  |         pieces["distance"] = int(count_out)  # total number of commits | ||||||
|  |  | ||||||
|  |     # commit date: see ISO-8601 comment in git_versions_from_keywords() | ||||||
|  |     date = run_command(GITS, ["show", "-s", "--format=%ci", "HEAD"], | ||||||
|  |                        cwd=root)[0].strip() | ||||||
|  |     pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1) | ||||||
|  |  | ||||||
|  |     return pieces | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def plus_or_dot(pieces): | ||||||
|  |     """Return a + if we don't already have one, else return a .""" | ||||||
|  |     if "+" in pieces.get("closest-tag", ""): | ||||||
|  |         return "." | ||||||
|  |     return "+" | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def render_pep440(pieces): | ||||||
|  |     """Build up version string, with post-release "local version identifier". | ||||||
|  |  | ||||||
|  |     Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you | ||||||
|  |     get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty | ||||||
|  |  | ||||||
|  |     Exceptions: | ||||||
|  |     1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty] | ||||||
|  |     """ | ||||||
|  |     if pieces["closest-tag"]: | ||||||
|  |         rendered = pieces["closest-tag"] | ||||||
|  |         if pieces["distance"] or pieces["dirty"]: | ||||||
|  |             rendered += plus_or_dot(pieces) | ||||||
|  |             rendered += "%d.g%s" % (pieces["distance"], pieces["short"]) | ||||||
|  |             if pieces["dirty"]: | ||||||
|  |                 rendered += ".dirty" | ||||||
|  |     else: | ||||||
|  |         # exception #1 | ||||||
|  |         rendered = "0+untagged.%d.g%s" % (pieces["distance"], | ||||||
|  |                                           pieces["short"]) | ||||||
|  |         if pieces["dirty"]: | ||||||
|  |             rendered += ".dirty" | ||||||
|  |     return rendered | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def render_pep440_pre(pieces): | ||||||
|  |     """TAG[.post.devDISTANCE] -- No -dirty. | ||||||
|  |  | ||||||
|  |     Exceptions: | ||||||
|  |     1: no tags. 0.post.devDISTANCE | ||||||
|  |     """ | ||||||
|  |     if pieces["closest-tag"]: | ||||||
|  |         rendered = pieces["closest-tag"] | ||||||
|  |         if pieces["distance"]: | ||||||
|  |             rendered += ".post.dev%d" % pieces["distance"] | ||||||
|  |     else: | ||||||
|  |         # exception #1 | ||||||
|  |         rendered = "0.post.dev%d" % pieces["distance"] | ||||||
|  |     return rendered | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def render_pep440_post(pieces): | ||||||
|  |     """TAG[.postDISTANCE[.dev0]+gHEX] . | ||||||
|  |  | ||||||
|  |     The ".dev0" means dirty. Note that .dev0 sorts backwards | ||||||
|  |     (a dirty tree will appear "older" than the corresponding clean one), | ||||||
|  |     but you shouldn't be releasing software with -dirty anyways. | ||||||
|  |  | ||||||
|  |     Exceptions: | ||||||
|  |     1: no tags. 0.postDISTANCE[.dev0] | ||||||
|  |     """ | ||||||
|  |     if pieces["closest-tag"]: | ||||||
|  |         rendered = pieces["closest-tag"] | ||||||
|  |         if pieces["distance"] or pieces["dirty"]: | ||||||
|  |             rendered += ".post%d" % pieces["distance"] | ||||||
|  |             if pieces["dirty"]: | ||||||
|  |                 rendered += ".dev0" | ||||||
|  |             rendered += plus_or_dot(pieces) | ||||||
|  |             rendered += "g%s" % pieces["short"] | ||||||
|  |     else: | ||||||
|  |         # exception #1 | ||||||
|  |         rendered = "0.post%d" % pieces["distance"] | ||||||
|  |         if pieces["dirty"]: | ||||||
|  |             rendered += ".dev0" | ||||||
|  |         rendered += "+g%s" % pieces["short"] | ||||||
|  |     return rendered | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def render_pep440_old(pieces): | ||||||
|  |     """TAG[.postDISTANCE[.dev0]] . | ||||||
|  |  | ||||||
|  |     The ".dev0" means dirty. | ||||||
|  |  | ||||||
|  |     Eexceptions: | ||||||
|  |     1: no tags. 0.postDISTANCE[.dev0] | ||||||
|  |     """ | ||||||
|  |     if pieces["closest-tag"]: | ||||||
|  |         rendered = pieces["closest-tag"] | ||||||
|  |         if pieces["distance"] or pieces["dirty"]: | ||||||
|  |             rendered += ".post%d" % pieces["distance"] | ||||||
|  |             if pieces["dirty"]: | ||||||
|  |                 rendered += ".dev0" | ||||||
|  |     else: | ||||||
|  |         # exception #1 | ||||||
|  |         rendered = "0.post%d" % pieces["distance"] | ||||||
|  |         if pieces["dirty"]: | ||||||
|  |             rendered += ".dev0" | ||||||
|  |     return rendered | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def render_git_describe(pieces): | ||||||
|  |     """TAG[-DISTANCE-gHEX][-dirty]. | ||||||
|  |  | ||||||
|  |     Like 'git describe --tags --dirty --always'. | ||||||
|  |  | ||||||
|  |     Exceptions: | ||||||
|  |     1: no tags. HEX[-dirty]  (note: no 'g' prefix) | ||||||
|  |     """ | ||||||
|  |     if pieces["closest-tag"]: | ||||||
|  |         rendered = pieces["closest-tag"] | ||||||
|  |         if pieces["distance"]: | ||||||
|  |             rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) | ||||||
|  |     else: | ||||||
|  |         # exception #1 | ||||||
|  |         rendered = pieces["short"] | ||||||
|  |     if pieces["dirty"]: | ||||||
|  |         rendered += "-dirty" | ||||||
|  |     return rendered | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def render_git_describe_long(pieces): | ||||||
|  |     """TAG-DISTANCE-gHEX[-dirty]. | ||||||
|  |  | ||||||
|  |     Like 'git describe --tags --dirty --always -long'. | ||||||
|  |     The distance/hash is unconditional. | ||||||
|  |  | ||||||
|  |     Exceptions: | ||||||
|  |     1: no tags. HEX[-dirty]  (note: no 'g' prefix) | ||||||
|  |     """ | ||||||
|  |     if pieces["closest-tag"]: | ||||||
|  |         rendered = pieces["closest-tag"] | ||||||
|  |         rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) | ||||||
|  |     else: | ||||||
|  |         # exception #1 | ||||||
|  |         rendered = pieces["short"] | ||||||
|  |     if pieces["dirty"]: | ||||||
|  |         rendered += "-dirty" | ||||||
|  |     return rendered | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def render(pieces, style): | ||||||
|  |     """Render the given version pieces into the requested style.""" | ||||||
|  |     if pieces["error"]: | ||||||
|  |         return {"version": "unknown", | ||||||
|  |                 "full-revisionid": pieces.get("long"), | ||||||
|  |                 "dirty": None, | ||||||
|  |                 "error": pieces["error"], | ||||||
|  |                 "date": None} | ||||||
|  |  | ||||||
|  |     if not style or style == "default": | ||||||
|  |         style = "pep440"  # the default | ||||||
|  |  | ||||||
|  |     if style == "pep440": | ||||||
|  |         rendered = render_pep440(pieces) | ||||||
|  |     elif style == "pep440-pre": | ||||||
|  |         rendered = render_pep440_pre(pieces) | ||||||
|  |     elif style == "pep440-post": | ||||||
|  |         rendered = render_pep440_post(pieces) | ||||||
|  |     elif style == "pep440-old": | ||||||
|  |         rendered = render_pep440_old(pieces) | ||||||
|  |     elif style == "git-describe": | ||||||
|  |         rendered = render_git_describe(pieces) | ||||||
|  |     elif style == "git-describe-long": | ||||||
|  |         rendered = render_git_describe_long(pieces) | ||||||
|  |     else: | ||||||
|  |         raise ValueError("unknown style '%s'" % style) | ||||||
|  |  | ||||||
|  |     return {"version": rendered, "full-revisionid": pieces["long"], | ||||||
|  |             "dirty": pieces["dirty"], "error": None, | ||||||
|  |             "date": pieces.get("date")} | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def get_versions(): | ||||||
|  |     """Get version information or return default if unable to do so.""" | ||||||
|  |     # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have | ||||||
|  |     # __file__, we can work backwards from there to the root. Some | ||||||
|  |     # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which | ||||||
|  |     # case we can only use expanded keywords. | ||||||
|  |  | ||||||
|  |     cfg = get_config() | ||||||
|  |     verbose = cfg.verbose | ||||||
|  |  | ||||||
|  |     try: | ||||||
|  |         return git_versions_from_keywords(get_keywords(), cfg.tag_prefix, | ||||||
|  |                                           verbose) | ||||||
|  |     except NotThisMethod: | ||||||
|  |         pass | ||||||
|  |  | ||||||
|  |     try: | ||||||
|  |         root = os.path.realpath(__file__) | ||||||
|  |         # versionfile_source is the relative path from the top of the source | ||||||
|  |         # tree (where the .git directory might live) to this file. Invert | ||||||
|  |         # this to find the root from __file__. | ||||||
|  |         for i in cfg.versionfile_source.split('/'): | ||||||
|  |             root = os.path.dirname(root) | ||||||
|  |     except NameError: | ||||||
|  |         return {"version": "0+unknown", "full-revisionid": None, | ||||||
|  |                 "dirty": None, | ||||||
|  |                 "error": "unable to find root of source tree", | ||||||
|  |                 "date": None} | ||||||
|  |  | ||||||
|  |     try: | ||||||
|  |         pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose) | ||||||
|  |         return render(pieces, cfg.style) | ||||||
|  |     except NotThisMethod: | ||||||
|  |         pass | ||||||
|  |  | ||||||
|  |     try: | ||||||
|  |         if cfg.parentdir_prefix: | ||||||
|  |             return versions_from_parentdir(cfg.parentdir_prefix, root, verbose) | ||||||
|  |     except NotThisMethod: | ||||||
|  |         pass | ||||||
|  |  | ||||||
|  |     return {"version": "0+unknown", "full-revisionid": None, | ||||||
|  |             "dirty": None, | ||||||
|  |             "error": "unable to compute version", "date": None} | ||||||
							
								
								
									
										4
									
								
								nilmdb/client/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										4
									
								
								nilmdb/client/__init__.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,4 @@ | |||||||
|  | """nilmdb.client""" | ||||||
|  |  | ||||||
|  | from nilmdb.client.client import Client | ||||||
|  | from nilmdb.client.errors import ClientError, ServerError, Error | ||||||
							
								
								
									
										477
									
								
								nilmdb/client/client.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										477
									
								
								nilmdb/client/client.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,477 @@ | |||||||
|  | # -*- coding: utf-8 -*- | ||||||
|  |  | ||||||
|  | """Class for performing HTTP client requests via libcurl""" | ||||||
|  |  | ||||||
|  | import json | ||||||
|  | import contextlib | ||||||
|  |  | ||||||
|  | import nilmdb.utils | ||||||
|  | import nilmdb.client.httpclient | ||||||
|  | from nilmdb.client.errors import ClientError | ||||||
|  | from nilmdb.utils.time import timestamp_to_string, string_to_timestamp | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def extract_timestamp(line): | ||||||
|  |     """Extract just the timestamp from a line of data text""" | ||||||
|  |     return string_to_timestamp(line.split()[0]) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class Client(): | ||||||
|  |     """Main client interface to the Nilm database.""" | ||||||
|  |  | ||||||
|  |     def __init__(self, url, post_json=False): | ||||||
|  |         """Initialize client with given URL.  If post_json is true, | ||||||
|  |         POST requests are sent with Content-Type 'application/json' | ||||||
|  |         instead of the default 'x-www-form-urlencoded'.""" | ||||||
|  |         self.http = nilmdb.client.httpclient.HTTPClient(url, post_json) | ||||||
|  |         self.post_json = post_json | ||||||
|  |  | ||||||
|  |     # __enter__/__exit__ allow this class to be a context manager | ||||||
|  |     def __enter__(self): | ||||||
|  |         return self | ||||||
|  |  | ||||||
|  |     def __exit__(self, exc_type, exc_value, traceback): | ||||||
|  |         self.close() | ||||||
|  |  | ||||||
|  |     def _json_post_param(self, data): | ||||||
|  |         """Return compact json-encoded version of parameter""" | ||||||
|  |         if self.post_json: | ||||||
|  |             # If we're posting as JSON, we don't need to encode it further here | ||||||
|  |             return data | ||||||
|  |         return json.dumps(data, separators=(',', ':')) | ||||||
|  |  | ||||||
|  |     def close(self): | ||||||
|  |         """Close the connection; safe to call multiple times""" | ||||||
|  |         self.http.close() | ||||||
|  |  | ||||||
|  |     def geturl(self): | ||||||
|  |         """Return the URL we're using""" | ||||||
|  |         return self.http.baseurl | ||||||
|  |  | ||||||
|  |     def version(self): | ||||||
|  |         """Return server version""" | ||||||
|  |         return self.http.get("version") | ||||||
|  |  | ||||||
|  |     def dbinfo(self): | ||||||
|  |         """Return server database info (path, size, free space) | ||||||
|  |         as a dictionary.""" | ||||||
|  |         return self.http.get("dbinfo") | ||||||
|  |  | ||||||
|  |     def stream_list(self, path=None, layout=None, extended=False): | ||||||
|  |         """Return a sorted list of [path, layout] lists.  If 'path' or | ||||||
|  |         'layout' are specified, only return streams that match those | ||||||
|  |         exact values.  If 'extended' is True, the returned lists have | ||||||
|  |         extended info, e.g.: [path, layout, extent_min, extent_max, | ||||||
|  |         total_rows, total_seconds.""" | ||||||
|  |         params = {} | ||||||
|  |         if path is not None: | ||||||
|  |             params["path"] = path | ||||||
|  |         if layout is not None: | ||||||
|  |             params["layout"] = layout | ||||||
|  |         if extended: | ||||||
|  |             params["extended"] = 1 | ||||||
|  |         streams = self.http.get("stream/list", params) | ||||||
|  |         return nilmdb.utils.sort.sort_human(streams, key=lambda s: s[0]) | ||||||
|  |  | ||||||
|  |     def stream_get_metadata(self, path, keys=None): | ||||||
|  |         """Get stream metadata""" | ||||||
|  |         params = {"path": path} | ||||||
|  |         if keys is not None: | ||||||
|  |             params["key"] = keys | ||||||
|  |         return self.http.get("stream/get_metadata", params) | ||||||
|  |  | ||||||
|  |     def stream_set_metadata(self, path, data): | ||||||
|  |         """Set stream metadata from a dictionary, replacing all existing | ||||||
|  |         metadata.""" | ||||||
|  |         params = { | ||||||
|  |             "path": path, | ||||||
|  |             "data": self._json_post_param(data) | ||||||
|  |         } | ||||||
|  |         return self.http.post("stream/set_metadata", params) | ||||||
|  |  | ||||||
|  |     def stream_update_metadata(self, path, data): | ||||||
|  |         """Update stream metadata from a dictionary""" | ||||||
|  |         params = { | ||||||
|  |             "path": path, | ||||||
|  |             "data": self._json_post_param(data) | ||||||
|  |         } | ||||||
|  |         return self.http.post("stream/update_metadata", params) | ||||||
|  |  | ||||||
|  |     def stream_create(self, path, layout): | ||||||
|  |         """Create a new stream""" | ||||||
|  |         params = { | ||||||
|  |             "path": path, | ||||||
|  |             "layout": layout | ||||||
|  |         } | ||||||
|  |         return self.http.post("stream/create", params) | ||||||
|  |  | ||||||
|  |     def stream_destroy(self, path): | ||||||
|  |         """Delete stream.  Fails if any data is still present.""" | ||||||
|  |         params = { | ||||||
|  |             "path": path | ||||||
|  |         } | ||||||
|  |         return self.http.post("stream/destroy", params) | ||||||
|  |  | ||||||
|  |     def stream_rename(self, oldpath, newpath): | ||||||
|  |         """Rename a stream.""" | ||||||
|  |         params = { | ||||||
|  |             "oldpath": oldpath, | ||||||
|  |             "newpath": newpath | ||||||
|  |         } | ||||||
|  |         return self.http.post("stream/rename", params) | ||||||
|  |  | ||||||
|  |     def stream_remove(self, path, start=None, end=None): | ||||||
|  |         """Remove data from the specified time range""" | ||||||
|  |         params = { | ||||||
|  |             "path": path | ||||||
|  |         } | ||||||
|  |         if start is not None: | ||||||
|  |             params["start"] = timestamp_to_string(start) | ||||||
|  |         if end is not None: | ||||||
|  |             params["end"] = timestamp_to_string(end) | ||||||
|  |         total = 0 | ||||||
|  |         for count in self.http.post_gen("stream/remove", params): | ||||||
|  |             total += int(count) | ||||||
|  |         return total | ||||||
|  |  | ||||||
|  |     @contextlib.contextmanager | ||||||
|  |     def stream_insert_context(self, path, start=None, end=None): | ||||||
|  |         """Return a context manager that allows data to be efficiently | ||||||
|  |         inserted into a stream in a piecewise manner.  Data is | ||||||
|  |         provided as ASCII lines, and is aggregated and sent to the | ||||||
|  |         server in larger or smaller chunks as necessary.  Data lines | ||||||
|  |         must match the database layout for the given path, and end | ||||||
|  |         with a newline. | ||||||
|  |  | ||||||
|  |         Example: | ||||||
|  |           with client.stream_insert_context('/path', start, end) as ctx: | ||||||
|  |             ctx.insert('1234567890000000 1 2 3 4\\n') | ||||||
|  |             ctx.insert('1234567891000000 1 2 3 4\\n') | ||||||
|  |  | ||||||
|  |         For more details, see help for nilmdb.client.client.StreamInserter | ||||||
|  |  | ||||||
|  |         This may make multiple requests to the server, if the data is | ||||||
|  |         large enough or enough time has passed between insertions. | ||||||
|  |         """ | ||||||
|  |         ctx = StreamInserter(self, path, start, end) | ||||||
|  |         yield ctx | ||||||
|  |         ctx.finalize() | ||||||
|  |         ctx.destroy() | ||||||
|  |  | ||||||
|  |     def stream_insert(self, path, data, start=None, end=None): | ||||||
|  |         """Insert rows of data into a stream.  data should be a string | ||||||
|  |         or iterable that provides ASCII data that matches the database | ||||||
|  |         layout for path.  Data is passed through stream_insert_context, | ||||||
|  |         so it will be broken into reasonably-sized chunks and | ||||||
|  |         start/end will be deduced if missing.""" | ||||||
|  |         with self.stream_insert_context(path, start, end) as ctx: | ||||||
|  |             if isinstance(data, bytes): | ||||||
|  |                 ctx.insert(data) | ||||||
|  |             else: | ||||||
|  |                 for chunk in data: | ||||||
|  |                     ctx.insert(chunk) | ||||||
|  |         return ctx.last_response | ||||||
|  |  | ||||||
|  |     def stream_insert_block(self, path, data, start, end, binary=False): | ||||||
|  |         """Insert a single fixed block of data into the stream.  It is | ||||||
|  |         sent directly to the server in one block with no further | ||||||
|  |         processing. | ||||||
|  |  | ||||||
|  |         If 'binary' is True, provide raw binary data in little-endian | ||||||
|  |         format matching the path layout, including an int64 timestamp. | ||||||
|  |         Otherwise, provide ASCII data matching the layout.""" | ||||||
|  |         params = { | ||||||
|  |             "path": path, | ||||||
|  |             "start": timestamp_to_string(start), | ||||||
|  |             "end": timestamp_to_string(end), | ||||||
|  |         } | ||||||
|  |         if binary: | ||||||
|  |             params["binary"] = 1 | ||||||
|  |         return self.http.put("stream/insert", data, params) | ||||||
|  |  | ||||||
|  |     def stream_intervals(self, path, start=None, end=None, diffpath=None): | ||||||
|  |         """ | ||||||
|  |         Return a generator that yields each stream interval. | ||||||
|  |  | ||||||
|  |         If 'diffpath' is not None, yields only interval ranges that are | ||||||
|  |         present in 'path' but not in 'diffpath'. | ||||||
|  |         """ | ||||||
|  |         params = { | ||||||
|  |             "path": path | ||||||
|  |         } | ||||||
|  |         if diffpath is not None: | ||||||
|  |             params["diffpath"] = diffpath | ||||||
|  |         if start is not None: | ||||||
|  |             params["start"] = timestamp_to_string(start) | ||||||
|  |         if end is not None: | ||||||
|  |             params["end"] = timestamp_to_string(end) | ||||||
|  |         return self.http.get_gen("stream/intervals", params) | ||||||
|  |  | ||||||
|  |     def stream_extract(self, path, start=None, end=None, | ||||||
|  |                        count=False, markup=False, binary=False): | ||||||
|  |         """ | ||||||
|  |         Extract data from a stream.  Returns a generator that yields | ||||||
|  |         lines of ASCII-formatted data that matches the database | ||||||
|  |         layout for the given path. | ||||||
|  |  | ||||||
|  |         If 'count' is True, return a count of matching data points | ||||||
|  |         rather than the actual data.  The output format is unchanged. | ||||||
|  |  | ||||||
|  |         If 'markup' is True, include comments in the returned data | ||||||
|  |         that indicate interval starts and ends. | ||||||
|  |  | ||||||
|  |         If 'binary' is True, return chunks of raw binary data, rather | ||||||
|  |         than lines of ASCII-formatted data.  Raw binary data is | ||||||
|  |         little-endian and matches the database types (including an | ||||||
|  |         int64 timestamp). | ||||||
|  |         """ | ||||||
|  |         params = { | ||||||
|  |             "path": path, | ||||||
|  |         } | ||||||
|  |         if start is not None: | ||||||
|  |             params["start"] = timestamp_to_string(start) | ||||||
|  |         if end is not None: | ||||||
|  |             params["end"] = timestamp_to_string(end) | ||||||
|  |         if count: | ||||||
|  |             params["count"] = 1 | ||||||
|  |         if markup: | ||||||
|  |             params["markup"] = 1 | ||||||
|  |         if binary: | ||||||
|  |             params["binary"] = 1 | ||||||
|  |         return self.http.get_gen("stream/extract", params, binary=binary) | ||||||
|  |  | ||||||
|  |     def stream_count(self, path, start=None, end=None): | ||||||
|  |         """ | ||||||
|  |         Return the number of rows of data in the stream that satisfy | ||||||
|  |         the given timestamps. | ||||||
|  |         """ | ||||||
|  |         counts = list(self.stream_extract(path, start, end, count=True)) | ||||||
|  |         return int(counts[0]) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class StreamInserter(): | ||||||
|  |     """Object returned by stream_insert_context() that manages | ||||||
|  |     the insertion of rows of data into a particular path. | ||||||
|  |  | ||||||
|  |     The basic data flow is that we are filling a contiguous interval | ||||||
|  |     on the server, with no gaps, that extends from timestamp 'start' | ||||||
|  |     to timestamp 'end'.  Data timestamps satisfy 'start <= t < end'. | ||||||
|  |  | ||||||
|  |     Data is provided to .insert() as ASCII formatted data separated by | ||||||
|  |     newlines.  The chunks of data passed to .insert() do not need to | ||||||
|  |     match up with the newlines; less or more than one line can be passed. | ||||||
|  |  | ||||||
|  |     1. The first inserted line begins a new interval that starts at | ||||||
|  |     'start'.  If 'start' is not given, it is deduced from the first | ||||||
|  |     line's timestamp. | ||||||
|  |  | ||||||
|  |     2. Subsequent lines go into the same contiguous interval.  As lines | ||||||
|  |     are inserted, this routine may make multiple insertion requests to | ||||||
|  |     the server, but will structure the timestamps to leave no gaps. | ||||||
|  |  | ||||||
|  |     3. The current contiguous interval can be completed by manually | ||||||
|  |     calling .finalize(), which the context manager will also do | ||||||
|  |     automatically.  This will send any remaining data to the server, | ||||||
|  |     using the 'end' timestamp to end the interval.  If no 'end' | ||||||
|  |     was provided, it is deduced from the last timestamp seen, | ||||||
|  |     plus a small delta. | ||||||
|  |  | ||||||
|  |     After a .finalize(), inserting new data goes back to step 1. | ||||||
|  |  | ||||||
|  |     .update_start() can be called before step 1 to change the start | ||||||
|  |     time for the interval.  .update_end() can be called before step 3 | ||||||
|  |     to change the end time for the interval. | ||||||
|  |     """ | ||||||
|  |  | ||||||
|  |     # See design.md for a discussion of how much data to send.  This | ||||||
|  |     # is a soft limit -- we might send up to twice as much or so | ||||||
|  |     _max_data = 2 * 1024 * 1024 | ||||||
|  |     _max_data_after_send = 64 * 1024 | ||||||
|  |  | ||||||
|  |     def __init__(self, client, path, start, end): | ||||||
|  |         """'client' is the client object.  'path' is the database | ||||||
|  |         path to insert to.  'start' and 'end' are used for the first | ||||||
|  |         contiguous interval and may be None.""" | ||||||
|  |         self.last_response = None | ||||||
|  |  | ||||||
|  |         self._client = client | ||||||
|  |         self._path = path | ||||||
|  |  | ||||||
|  |         # Start and end for the overall contiguous interval we're | ||||||
|  |         # filling | ||||||
|  |         self._interval_start = start | ||||||
|  |         self._interval_end = end | ||||||
|  |  | ||||||
|  |         # Current data we're building up to send.  Each string | ||||||
|  |         # goes into the array, and gets joined all at once. | ||||||
|  |         self._block_data = [] | ||||||
|  |         self._block_len = 0 | ||||||
|  |  | ||||||
|  |         self.destroyed = False | ||||||
|  |  | ||||||
|  |     def destroy(self): | ||||||
|  |         """Ensure this object can't be used again without raising | ||||||
|  |         an error""" | ||||||
|  |         def error(*args, **kwargs): | ||||||
|  |             raise Exception("don't reuse this context object") | ||||||
|  |         self._send_block = self.insert = self.finalize = self.send = error | ||||||
|  |  | ||||||
|  |     def insert(self, data): | ||||||
|  |         """Insert a chunk of ASCII formatted data in string form.  The | ||||||
|  |         overall data must consist of lines terminated by '\\n'.""" | ||||||
|  |         length = len(data) | ||||||
|  |         maxdata = self._max_data | ||||||
|  |  | ||||||
|  |         if length > maxdata: | ||||||
|  |             # This could make our buffer more than twice what we | ||||||
|  |             # wanted to send, so split it up.  This is a bit | ||||||
|  |             # inefficient, but the user really shouldn't be providing | ||||||
|  |             # this much data at once. | ||||||
|  |             for cut in range(0, length, maxdata): | ||||||
|  |                 self.insert(data[cut:(cut + maxdata)]) | ||||||
|  |             return | ||||||
|  |  | ||||||
|  |         # Append this string to our list | ||||||
|  |         self._block_data.append(data) | ||||||
|  |         self._block_len += length | ||||||
|  |  | ||||||
|  |         # Send the block once we have enough data | ||||||
|  |         if self._block_len >= maxdata: | ||||||
|  |             self._send_block(final=False) | ||||||
|  |             if self._block_len >= self._max_data_after_send: | ||||||
|  |                 raise ValueError("too much data left over after trying" | ||||||
|  |                                  " to send intermediate block; is it" | ||||||
|  |                                  " missing newlines or malformed?") | ||||||
|  |  | ||||||
|  |     def update_start(self, start): | ||||||
|  |         """Update the start time for the next contiguous interval. | ||||||
|  |         Call this before starting to insert data for a new interval, | ||||||
|  |         for example, after .finalize()""" | ||||||
|  |         self._interval_start = start | ||||||
|  |  | ||||||
|  |     def update_end(self, end): | ||||||
|  |         """Update the end time for the current contiguous interval. | ||||||
|  |         Call this before .finalize()""" | ||||||
|  |         self._interval_end = end | ||||||
|  |  | ||||||
|  |     def finalize(self): | ||||||
|  |         """Stop filling the current contiguous interval. | ||||||
|  |         All outstanding data will be sent, and the interval end | ||||||
|  |         time of the interval will be taken from the 'end' argument | ||||||
|  |         used when initializing this class, or the most recent | ||||||
|  |         value passed to update_end(), or the last timestamp plus | ||||||
|  |         a small epsilon value if no other endpoint was provided. | ||||||
|  |  | ||||||
|  |         If more data is inserted after a finalize(), it will become | ||||||
|  |         part of a new interval and there may be a gap left in-between.""" | ||||||
|  |         self._send_block(final=True) | ||||||
|  |  | ||||||
|  |     def send(self): | ||||||
|  |         """Send any data that we might have buffered up.  Does not affect | ||||||
|  |         any other treatment of timestamps or endpoints.""" | ||||||
|  |         self._send_block(final=False) | ||||||
|  |  | ||||||
|  |     def _get_first_noncomment(self, block): | ||||||
|  |         """Return the (start, end) indices of the first full line in | ||||||
|  |         block that isn't a comment, or raise IndexError if | ||||||
|  |         there isn't one.""" | ||||||
|  |         start = 0 | ||||||
|  |         while True: | ||||||
|  |             end = block.find(b'\n', start) | ||||||
|  |             if end < 0: | ||||||
|  |                 raise IndexError | ||||||
|  |             if block[start] != b'#'[0]: | ||||||
|  |                 return (start, (end + 1)) | ||||||
|  |             start = end + 1 | ||||||
|  |  | ||||||
|  |     def _get_last_noncomment(self, block): | ||||||
|  |         """Return the (start, end) indices of the last full line in | ||||||
|  |         block[:length] that isn't a comment, or raise IndexError if | ||||||
|  |         there isn't one.""" | ||||||
|  |         end = block.rfind(b'\n') | ||||||
|  |         if end <= 0: | ||||||
|  |             raise IndexError | ||||||
|  |         while True: | ||||||
|  |             start = block.rfind(b'\n', 0, end) | ||||||
|  |             if block[start + 1] != b'#'[0]: | ||||||
|  |                 return ((start + 1), end) | ||||||
|  |             if start == -1: | ||||||
|  |                 raise IndexError | ||||||
|  |             end = start | ||||||
|  |  | ||||||
|  |     def _send_block(self, final=False): | ||||||
|  |         """Send data currently in the block.  The data sent will | ||||||
|  |         consist of full lines only, so some might be left over.""" | ||||||
|  |         # Build the full string to send | ||||||
|  |         block = b"".join(self._block_data) | ||||||
|  |  | ||||||
|  |         start_ts = self._interval_start | ||||||
|  |         if start_ts is None: | ||||||
|  |             # Pull start from the first line | ||||||
|  |             try: | ||||||
|  |                 (spos, epos) = self._get_first_noncomment(block) | ||||||
|  |                 start_ts = extract_timestamp(block[spos:epos]) | ||||||
|  |             except (ValueError, IndexError): | ||||||
|  |                 pass  # no timestamp is OK, if we have no data | ||||||
|  |  | ||||||
|  |         if final: | ||||||
|  |             # For a final block, it must end in a newline, and the | ||||||
|  |             # ending timestamp is either the user-provided end, | ||||||
|  |             # or the timestamp of the last line plus epsilon. | ||||||
|  |             end_ts = self._interval_end | ||||||
|  |             try: | ||||||
|  |                 if block[-1] != b'\n'[0]: | ||||||
|  |                     raise ValueError("final block didn't end with a newline") | ||||||
|  |                 if end_ts is None: | ||||||
|  |                     (spos, epos) = self._get_last_noncomment(block) | ||||||
|  |                     end_ts = extract_timestamp(block[spos:epos]) | ||||||
|  |                     end_ts += nilmdb.utils.time.epsilon | ||||||
|  |             except (ValueError, IndexError): | ||||||
|  |                 pass  # no timestamp is OK, if we have no data | ||||||
|  |             self._block_data = [] | ||||||
|  |             self._block_len = 0 | ||||||
|  |  | ||||||
|  |             # Next block is completely fresh | ||||||
|  |             self._interval_start = None | ||||||
|  |             self._interval_end = None | ||||||
|  |         else: | ||||||
|  |             # An intermediate block, e.g. "line1\nline2\nline3\nline4" | ||||||
|  |             # We need to save "line3\nline4" for the next block, and | ||||||
|  |             # use the timestamp from "line3" as the ending timestamp | ||||||
|  |             # for this one. | ||||||
|  |             try: | ||||||
|  |                 (spos, epos) = self._get_last_noncomment(block) | ||||||
|  |                 end_ts = extract_timestamp(block[spos:epos]) | ||||||
|  |             except (ValueError, IndexError): | ||||||
|  |                 # If we found no timestamp, give up; we could send this | ||||||
|  |                 # block later when we have more data. | ||||||
|  |                 return | ||||||
|  |             if spos == 0: | ||||||
|  |                 # Not enough data to send an intermediate block | ||||||
|  |                 return | ||||||
|  |             if self._interval_end is not None and end_ts > self._interval_end: | ||||||
|  |                 # User gave us bad endpoints; send it anyway, and let | ||||||
|  |                 # the server complain so that the error is the same | ||||||
|  |                 # as if we hadn't done this chunking. | ||||||
|  |                 end_ts = self._interval_end | ||||||
|  |             self._block_data = [block[spos:]] | ||||||
|  |             self._block_len = (epos - spos) | ||||||
|  |             block = block[:spos] | ||||||
|  |  | ||||||
|  |             # Next block continues where this one ended | ||||||
|  |             self._interval_start = end_ts | ||||||
|  |  | ||||||
|  |         # Double check endpoints | ||||||
|  |         if (start_ts is None or end_ts is None) or (start_ts == end_ts): | ||||||
|  |             # If the block has no non-comment lines, it's OK | ||||||
|  |             try: | ||||||
|  |                 self._get_first_noncomment(block) | ||||||
|  |             except IndexError: | ||||||
|  |                 return | ||||||
|  |             raise ClientError("have data to send, but no start/end times") | ||||||
|  |  | ||||||
|  |         # Send it | ||||||
|  |         self.last_response = self._client.stream_insert_block( | ||||||
|  |             self._path, block, start_ts, end_ts, binary=False) | ||||||
|  |  | ||||||
|  |         return | ||||||
							
								
								
									
										41
									
								
								nilmdb/client/errors.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										41
									
								
								nilmdb/client/errors.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,41 @@ | |||||||
|  | """HTTP client errors""" | ||||||
|  |  | ||||||
|  | from nilmdb.utils.printf import sprintf | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class Error(Exception): | ||||||
|  |     """Base exception for both ClientError and ServerError responses""" | ||||||
|  |     def __init__(self, | ||||||
|  |                  status="Unspecified error", | ||||||
|  |                  message=None, | ||||||
|  |                  url=None, | ||||||
|  |                  traceback=None): | ||||||
|  |         super().__init__(status) | ||||||
|  |         self.status = status     # e.g. "400 Bad Request" | ||||||
|  |         self.message = message   # textual message from the server | ||||||
|  |         self.url = url           # URL we were requesting | ||||||
|  |         self.traceback = traceback  # server traceback, if available | ||||||
|  |  | ||||||
|  |     def _format_error(self, show_url): | ||||||
|  |         s = sprintf("[%s]", self.status) | ||||||
|  |         if self.message: | ||||||
|  |             s += sprintf(" %s", self.message) | ||||||
|  |         if show_url and self.url: | ||||||
|  |             s += sprintf(" (%s)", self.url) | ||||||
|  |         if self.traceback: | ||||||
|  |             s += sprintf("\nServer traceback:\n%s", self.traceback) | ||||||
|  |         return s | ||||||
|  |  | ||||||
|  |     def __str__(self): | ||||||
|  |         return self._format_error(show_url=False) | ||||||
|  |  | ||||||
|  |     def __repr__(self): | ||||||
|  |         return self._format_error(show_url=True) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class ClientError(Error): | ||||||
|  |     pass | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class ServerError(Error): | ||||||
|  |     pass | ||||||
							
								
								
									
										189
									
								
								nilmdb/client/httpclient.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										189
									
								
								nilmdb/client/httpclient.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,189 @@ | |||||||
|  | """HTTP client library""" | ||||||
|  |  | ||||||
|  | import json | ||||||
|  | import urllib.parse | ||||||
|  | import requests | ||||||
|  |  | ||||||
|  | from nilmdb.client.errors import ClientError, ServerError, Error | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class HTTPClient(): | ||||||
|  |     """Class to manage and perform HTTP requests from the client""" | ||||||
|  |     def __init__(self, baseurl="", post_json=False, verify_ssl=True): | ||||||
|  |         """If baseurl is supplied, all other functions that take | ||||||
|  |         a URL can be given a relative URL instead.""" | ||||||
|  |         # Verify / clean up URL | ||||||
|  |         reparsed = urllib.parse.urlparse(baseurl).geturl() | ||||||
|  |         if '://' not in reparsed: | ||||||
|  |             reparsed = urllib.parse.urlparse("http://" + baseurl).geturl() | ||||||
|  |         self.baseurl = reparsed.rstrip('/') + '/' | ||||||
|  |  | ||||||
|  |         # Note whether we want SSL verification | ||||||
|  |         self.verify_ssl = verify_ssl | ||||||
|  |  | ||||||
|  |         # Saved response, so that tests can verify a few things. | ||||||
|  |         self._last_response = {} | ||||||
|  |  | ||||||
|  |         # Whether to send application/json POST bodies (versus | ||||||
|  |         # x-www-form-urlencoded) | ||||||
|  |         self.post_json = post_json | ||||||
|  |  | ||||||
|  |     def _handle_error(self, url, code, body): | ||||||
|  |         # Default variables for exception.  We use the entire body as | ||||||
|  |         # the default message, in case we can't extract it from a JSON | ||||||
|  |         # response. | ||||||
|  |         args = { | ||||||
|  |             "url": url, | ||||||
|  |             "status": str(code), | ||||||
|  |             "message": body, | ||||||
|  |             "traceback": None | ||||||
|  |         } | ||||||
|  |         try: | ||||||
|  |             # Fill with server-provided data if we can | ||||||
|  |             jsonerror = json.loads(body) | ||||||
|  |             args["status"] = jsonerror["status"] | ||||||
|  |             args["message"] = jsonerror["message"] | ||||||
|  |             args["traceback"] = jsonerror["traceback"] | ||||||
|  |         except Exception: | ||||||
|  |             pass | ||||||
|  |         if 400 <= code <= 499: | ||||||
|  |             raise ClientError(**args) | ||||||
|  |         else: | ||||||
|  |             if 500 <= code <= 599: | ||||||
|  |                 if args["message"] is None: | ||||||
|  |                     args["message"] = ("(no message; try disabling " | ||||||
|  |                                        "response.stream option in " | ||||||
|  |                                        "nilmdb.server for better debugging)") | ||||||
|  |                 raise ServerError(**args) | ||||||
|  |             else: | ||||||
|  |                 raise Error(**args) | ||||||
|  |  | ||||||
|  |     def close(self): | ||||||
|  |         pass | ||||||
|  |  | ||||||
|  |     def _do_req(self, method, url, query_data, body_data, stream, headers): | ||||||
|  |         url = urllib.parse.urljoin(self.baseurl, url) | ||||||
|  |         try: | ||||||
|  |             # Create a new session, ensure we send "Connection: close", | ||||||
|  |             # and explicitly close connection after the transfer. | ||||||
|  |             # This is to avoid HTTP/1.1 persistent connections | ||||||
|  |             # (keepalive), because they have fundamental race | ||||||
|  |             # conditions when there are delays between requests: | ||||||
|  |             # a new request may be sent at the same instant that the | ||||||
|  |             # server decides to timeout the connection. | ||||||
|  |             session = requests.Session() | ||||||
|  |             if headers is None: | ||||||
|  |                 headers = {} | ||||||
|  |             headers["Connection"] = "close" | ||||||
|  |             response = session.request(method, url, | ||||||
|  |                                        params=query_data, | ||||||
|  |                                        data=body_data, | ||||||
|  |                                        stream=stream, | ||||||
|  |                                        headers=headers, | ||||||
|  |                                        verify=self.verify_ssl) | ||||||
|  |  | ||||||
|  |             # Close the connection.  If it's a generator (stream = | ||||||
|  |             # True), the requests library shouldn't actually close the | ||||||
|  |             # HTTP connection until all data has been read from the | ||||||
|  |             # response. | ||||||
|  |             session.close() | ||||||
|  |         except requests.RequestException as e: | ||||||
|  |             raise ServerError(status="502 Error", url=url, | ||||||
|  |                               message=str(e)) | ||||||
|  |         if response.status_code != 200: | ||||||
|  |             self._handle_error(url, response.status_code, response.content) | ||||||
|  |         self._last_response = response | ||||||
|  |         if response.headers["content-type"] in ("application/json", | ||||||
|  |                                                 "application/x-json-stream"): | ||||||
|  |             return (response, True) | ||||||
|  |         else: | ||||||
|  |             return (response, False) | ||||||
|  |  | ||||||
|  |     # Normal versions that return data directly | ||||||
|  |     def _req(self, method, url, query=None, body=None, headers=None): | ||||||
|  |         """ | ||||||
|  |         Make a request and return the body data as a string or parsed | ||||||
|  |         JSON object, or raise an error if it contained an error. | ||||||
|  |         """ | ||||||
|  |         (response, isjson) = self._do_req(method, url, query, body, | ||||||
|  |                                           stream=False, headers=headers) | ||||||
|  |         if isjson: | ||||||
|  |             return json.loads(response.content) | ||||||
|  |         return response.text | ||||||
|  |  | ||||||
|  |     def get(self, url, params=None): | ||||||
|  |         """Simple GET (parameters in URL)""" | ||||||
|  |         return self._req("GET", url, params, None) | ||||||
|  |  | ||||||
|  |     def post(self, url, params=None): | ||||||
|  |         """Simple POST (parameters in body)""" | ||||||
|  |         if self.post_json: | ||||||
|  |             return self._req("POST", url, None, | ||||||
|  |                              json.dumps(params), | ||||||
|  |                              {'Content-type': 'application/json'}) | ||||||
|  |         else: | ||||||
|  |             return self._req("POST", url, None, params) | ||||||
|  |  | ||||||
|  |     def put(self, url, data, params=None, | ||||||
|  |             content_type="application/octet-stream"): | ||||||
|  |         """Simple PUT (parameters in URL, data in body)""" | ||||||
|  |         h = {'Content-type': content_type} | ||||||
|  |         return self._req("PUT", url, query=params, body=data, headers=h) | ||||||
|  |  | ||||||
|  |     # Generator versions that return data one line at a time. | ||||||
|  |     def _req_gen(self, method, url, query=None, body=None, | ||||||
|  |                  headers=None, binary=False): | ||||||
|  |         """ | ||||||
|  |         Make a request and return a generator that gives back strings | ||||||
|  |         or JSON decoded lines of the body data, or raise an error if | ||||||
|  |         it contained an eror. | ||||||
|  |         """ | ||||||
|  |         (response, isjson) = self._do_req(method, url, query, body, | ||||||
|  |                                           stream=True, headers=headers) | ||||||
|  |  | ||||||
|  |         # Like the iter_lines function in Requests, but only splits on | ||||||
|  |         # the specified line ending. | ||||||
|  |         def lines(source, ending): | ||||||
|  |             pending = None | ||||||
|  |             for chunk in source: | ||||||
|  |                 if pending is not None: | ||||||
|  |                     chunk = pending + chunk | ||||||
|  |                 tmp = chunk.split(ending) | ||||||
|  |                 lines = tmp[:-1] | ||||||
|  |                 if chunk.endswith(ending): | ||||||
|  |                     pending = None | ||||||
|  |                 else: | ||||||
|  |                     pending = tmp[-1] | ||||||
|  |                 for line in lines: | ||||||
|  |                     yield line | ||||||
|  |             if pending is not None: | ||||||
|  |                 yield pending | ||||||
|  |  | ||||||
|  |         # Yield the chunks or lines as requested | ||||||
|  |         if binary: | ||||||
|  |             for chunk in response.iter_content(chunk_size=65536): | ||||||
|  |                 yield chunk | ||||||
|  |         elif isjson: | ||||||
|  |             for line in lines(response.iter_content(chunk_size=1), | ||||||
|  |                               ending=b'\r\n'): | ||||||
|  |                 yield json.loads(line) | ||||||
|  |         else: | ||||||
|  |             for line in lines(response.iter_content(chunk_size=65536), | ||||||
|  |                               ending=b'\n'): | ||||||
|  |                 yield line | ||||||
|  |  | ||||||
|  |     def get_gen(self, url, params=None, binary=False): | ||||||
|  |         """Simple GET (parameters in URL) returning a generator""" | ||||||
|  |         return self._req_gen("GET", url, params, binary=binary) | ||||||
|  |  | ||||||
|  |     def post_gen(self, url, params=None): | ||||||
|  |         """Simple POST (parameters in body) returning a generator""" | ||||||
|  |         if self.post_json: | ||||||
|  |             return self._req_gen("POST", url, None, | ||||||
|  |                                  json.dumps(params), | ||||||
|  |                                  {'Content-type': 'application/json'}) | ||||||
|  |         else: | ||||||
|  |             return self._req_gen("POST", url, None, params) | ||||||
|  |  | ||||||
|  |     # Not much use for a POST or PUT generator, since they don't | ||||||
|  |     # return much data. | ||||||
							
								
								
									
										263
									
								
								nilmdb/client/numpyclient.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										263
									
								
								nilmdb/client/numpyclient.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,263 @@ | |||||||
|  | # -*- coding: utf-8 -*- | ||||||
|  |  | ||||||
|  | """Provide a NumpyClient class that is based on normal Client, but has | ||||||
|  | additional methods for extracting and inserting data via Numpy arrays.""" | ||||||
|  |  | ||||||
|  | import contextlib | ||||||
|  |  | ||||||
|  | import numpy | ||||||
|  |  | ||||||
|  | import nilmdb.utils | ||||||
|  | import nilmdb.client.client | ||||||
|  | import nilmdb.client.httpclient | ||||||
|  | from nilmdb.client.errors import ClientError | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def layout_to_dtype(layout): | ||||||
|  |     ltype = layout.split('_')[0] | ||||||
|  |     lcount = int(layout.split('_')[1]) | ||||||
|  |     if ltype.startswith('int'): | ||||||
|  |         atype = '<i' + str(int(ltype[3:]) // 8) | ||||||
|  |     elif ltype.startswith('uint'): | ||||||
|  |         atype = '<u' + str(int(ltype[4:]) // 8) | ||||||
|  |     elif ltype.startswith('float'): | ||||||
|  |         atype = '<f' + str(int(ltype[5:]) // 8) | ||||||
|  |     else: | ||||||
|  |         raise ValueError("bad layout") | ||||||
|  |     if lcount == 1: | ||||||
|  |         dtype = [('timestamp', '<i8'), ('data', atype)] | ||||||
|  |     else: | ||||||
|  |         dtype = [('timestamp', '<i8'), ('data', atype, lcount)] | ||||||
|  |     return numpy.dtype(dtype) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class NumpyClient(nilmdb.client.client.Client): | ||||||
|  |     """Subclass of nilmdb.client.Client that adds additional methods for | ||||||
|  |     extracting and inserting data via Numpy arrays.""" | ||||||
|  |  | ||||||
|  |     def _get_dtype(self, path, layout): | ||||||
|  |         if layout is None: | ||||||
|  |             streams = self.stream_list(path) | ||||||
|  |             if len(streams) != 1: | ||||||
|  |                 raise ClientError("can't get layout for path: " + path) | ||||||
|  |             layout = streams[0][1] | ||||||
|  |         return layout_to_dtype(layout) | ||||||
|  |  | ||||||
|  |     def stream_extract_numpy(self, path, start=None, end=None, | ||||||
|  |                              layout=None, maxrows=100000, | ||||||
|  |                              structured=False): | ||||||
|  |         """ | ||||||
|  |         Extract data from a stream.  Returns a generator that yields | ||||||
|  |         Numpy arrays of up to 'maxrows' of data each. | ||||||
|  |  | ||||||
|  |         If 'layout' is None, it is read using stream_info. | ||||||
|  |  | ||||||
|  |         If 'structured' is False, all data is converted to float64 | ||||||
|  |         and returned in a flat 2D array.  Otherwise, data is returned | ||||||
|  |         as a structured dtype in a 1D array. | ||||||
|  |         """ | ||||||
|  |         dtype = self._get_dtype(path, layout) | ||||||
|  |  | ||||||
|  |         def to_numpy(data): | ||||||
|  |             a = numpy.frombuffer(data, dtype) | ||||||
|  |             if structured: | ||||||
|  |                 return a | ||||||
|  |             return numpy.c_[a['timestamp'], a['data']] | ||||||
|  |  | ||||||
|  |         chunks = [] | ||||||
|  |         total_len = 0 | ||||||
|  |         maxsize = dtype.itemsize * maxrows | ||||||
|  |         for data in self.stream_extract(path, start, end, binary=True): | ||||||
|  |             # Add this block of binary data | ||||||
|  |             chunks.append(data) | ||||||
|  |             total_len += len(data) | ||||||
|  |  | ||||||
|  |             # See if we have enough to make the requested Numpy array | ||||||
|  |             while total_len >= maxsize: | ||||||
|  |                 assembled = b"".join(chunks) | ||||||
|  |                 total_len -= maxsize | ||||||
|  |                 chunks = [assembled[maxsize:]] | ||||||
|  |                 block = assembled[:maxsize] | ||||||
|  |                 yield to_numpy(block) | ||||||
|  |  | ||||||
|  |         if total_len: | ||||||
|  |             yield to_numpy(b"".join(chunks)) | ||||||
|  |  | ||||||
|  |     @contextlib.contextmanager | ||||||
|  |     def stream_insert_numpy_context(self, path, start=None, end=None, | ||||||
|  |                                     layout=None): | ||||||
|  |         """Return a context manager that allows data to be efficiently | ||||||
|  |         inserted into a stream in a piecewise manner.  Data is | ||||||
|  |         provided as Numpy arrays, and is aggregated and sent to the | ||||||
|  |         server in larger or smaller chunks as necessary.  Data format | ||||||
|  |         must match the database layout for the given path. | ||||||
|  |  | ||||||
|  |         For more details, see help for | ||||||
|  |         nilmdb.client.numpyclient.StreamInserterNumpy | ||||||
|  |  | ||||||
|  |         If 'layout' is not None, use it as the layout rather than | ||||||
|  |         querying the database. | ||||||
|  |         """ | ||||||
|  |         dtype = self._get_dtype(path, layout) | ||||||
|  |         ctx = StreamInserterNumpy(self, path, start, end, dtype) | ||||||
|  |         yield ctx | ||||||
|  |         ctx.finalize() | ||||||
|  |         ctx.destroy() | ||||||
|  |  | ||||||
|  |     def stream_insert_numpy(self, path, data, start=None, end=None, | ||||||
|  |                             layout=None): | ||||||
|  |         """Insert data into a stream.  data should be a Numpy array | ||||||
|  |         which will be passed through stream_insert_numpy_context to | ||||||
|  |         break it into chunks etc.  See the help for that function | ||||||
|  |         for details.""" | ||||||
|  |         with self.stream_insert_numpy_context(path, start, end, layout) as ctx: | ||||||
|  |             if isinstance(data, numpy.ndarray): | ||||||
|  |                 ctx.insert(data) | ||||||
|  |             else: | ||||||
|  |                 for chunk in data: | ||||||
|  |                     ctx.insert(chunk) | ||||||
|  |         return ctx.last_response | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class StreamInserterNumpy(nilmdb.client.client.StreamInserter): | ||||||
|  |     """Object returned by stream_insert_numpy_context() that manages | ||||||
|  |     the insertion of rows of data into a particular path. | ||||||
|  |  | ||||||
|  |     See help for nilmdb.client.client.StreamInserter for details. | ||||||
|  |     The only difference is that, instead of ASCII formatted data, | ||||||
|  |     this context manager can take Numpy arrays, which are either | ||||||
|  |     structured (1D with complex dtype) or flat (2D with simple dtype). | ||||||
|  |     """ | ||||||
|  |  | ||||||
|  |     # Soft limit of how many bytes to send per HTTP request. | ||||||
|  |     _max_data = 2 * 1024 * 1024 | ||||||
|  |  | ||||||
|  |     def __init__(self, client, path, start, end, dtype): | ||||||
|  |         """ | ||||||
|  |         'client' is the client object.  'path' is the database path | ||||||
|  |         to insert to.  'start' and 'end' are used for the first | ||||||
|  |         contiguous interval and may be None.  'dtype' is the Numpy | ||||||
|  |         dtype for this stream. | ||||||
|  |         """ | ||||||
|  |         super(StreamInserterNumpy, self).__init__(client, path, start, end) | ||||||
|  |         self._dtype = dtype | ||||||
|  |  | ||||||
|  |         # Max rows to send at once | ||||||
|  |         self._max_rows = self._max_data // self._dtype.itemsize | ||||||
|  |  | ||||||
|  |         # List of the current arrays we're building up to send | ||||||
|  |         self._block_arrays = [] | ||||||
|  |         self._block_rows = 0 | ||||||
|  |  | ||||||
|  |     def insert(self, array): | ||||||
|  |         """Insert Numpy data, which must match the layout type.""" | ||||||
|  |         if not isinstance(array, numpy.ndarray): | ||||||
|  |             array = numpy.array(array) | ||||||
|  |         if array.ndim == 1: | ||||||
|  |             # Already a structured array; just verify the type | ||||||
|  |             if array.dtype != self._dtype: | ||||||
|  |                 raise ValueError("wrong dtype for 1D (structured) array") | ||||||
|  |         elif array.ndim == 2: | ||||||
|  |             # Convert to structured array | ||||||
|  |             sarray = numpy.zeros(array.shape[0], dtype=self._dtype) | ||||||
|  |             try: | ||||||
|  |                 sarray['timestamp'] = array[:, 0] | ||||||
|  |                 # Need the squeeze in case sarray['data'] is 1 dimensional | ||||||
|  |                 sarray['data'] = numpy.squeeze(array[:, 1:]) | ||||||
|  |             except (IndexError, ValueError): | ||||||
|  |                 raise ValueError("wrong number of fields for this data type") | ||||||
|  |             array = sarray | ||||||
|  |         else: | ||||||
|  |             raise ValueError("wrong number of dimensions in array") | ||||||
|  |  | ||||||
|  |         length = len(array) | ||||||
|  |         maxrows = self._max_rows | ||||||
|  |  | ||||||
|  |         if length == 0: | ||||||
|  |             return | ||||||
|  |         if length > maxrows: | ||||||
|  |             # This is more than twice what we wanted to send, so split | ||||||
|  |             # it up.  This is a bit inefficient, but the user really | ||||||
|  |             # shouldn't be providing this much data at once. | ||||||
|  |             for cut in range(0, length, maxrows): | ||||||
|  |                 self.insert(array[cut:(cut + maxrows)]) | ||||||
|  |             return | ||||||
|  |  | ||||||
|  |         # Add this array to our list | ||||||
|  |         self._block_arrays.append(array) | ||||||
|  |         self._block_rows += length | ||||||
|  |  | ||||||
|  |         # Send if it's too long | ||||||
|  |         if self._block_rows >= maxrows: | ||||||
|  |             self._send_block(final=False) | ||||||
|  |  | ||||||
|  |     def _send_block(self, final=False): | ||||||
|  |         """Send the data current stored up.  One row might be left | ||||||
|  |         over if we need its timestamp saved.""" | ||||||
|  |  | ||||||
|  |         # Build the full array to send | ||||||
|  |         if self._block_rows == 0: | ||||||
|  |             array = numpy.zeros(0, dtype=self._dtype) | ||||||
|  |         else: | ||||||
|  |             array = numpy.hstack(self._block_arrays) | ||||||
|  |  | ||||||
|  |         # Get starting timestamp | ||||||
|  |         start_ts = self._interval_start | ||||||
|  |         if start_ts is None: | ||||||
|  |             # Pull start from the first row | ||||||
|  |             try: | ||||||
|  |                 start_ts = array['timestamp'][0] | ||||||
|  |             except IndexError: | ||||||
|  |                 pass  # no timestamp is OK, if we have no data | ||||||
|  |  | ||||||
|  |         # Get ending timestamp | ||||||
|  |         if final: | ||||||
|  |             # For a final block, the timestamp is either the | ||||||
|  |             # user-provided end, or the timestamp of the last line | ||||||
|  |             # plus epsilon. | ||||||
|  |             end_ts = self._interval_end | ||||||
|  |             if end_ts is None: | ||||||
|  |                 try: | ||||||
|  |                     end_ts = array['timestamp'][-1] | ||||||
|  |                     end_ts += nilmdb.utils.time.epsilon | ||||||
|  |                 except IndexError: | ||||||
|  |                     pass  # no timestamp is OK, if we have no data | ||||||
|  |             self._block_arrays = [] | ||||||
|  |             self._block_rows = 0 | ||||||
|  |  | ||||||
|  |             # Next block is completely fresh | ||||||
|  |             self._interval_start = None | ||||||
|  |             self._interval_end = None | ||||||
|  |         else: | ||||||
|  |             # An intermediate block.  We need to save the last row | ||||||
|  |             # for the next block, and use its timestamp as the ending | ||||||
|  |             # timestamp for this one. | ||||||
|  |             if len(array) < 2: | ||||||
|  |                 # Not enough data to send an intermediate block | ||||||
|  |                 return | ||||||
|  |             end_ts = array['timestamp'][-1] | ||||||
|  |             if self._interval_end is not None and end_ts > self._interval_end: | ||||||
|  |                 # User gave us bad endpoints; send it anyway, and let | ||||||
|  |                 # the server complain so that the error is the same | ||||||
|  |                 # as if we hadn't done this chunking. | ||||||
|  |                 end_ts = self._interval_end | ||||||
|  |             self._block_arrays = [array[-1:]] | ||||||
|  |             self._block_rows = 1 | ||||||
|  |             array = array[:-1] | ||||||
|  |  | ||||||
|  |             # Next block continues where this one ended | ||||||
|  |             self._interval_start = end_ts | ||||||
|  |  | ||||||
|  |         # If we have no endpoints, or equal endpoints, it's OK as long | ||||||
|  |         # as there's no data to send | ||||||
|  |         if (start_ts is None or end_ts is None) or (start_ts == end_ts): | ||||||
|  |             if not array: | ||||||
|  |                 return | ||||||
|  |             raise ClientError("have data to send, but invalid start/end times") | ||||||
|  |  | ||||||
|  |         # Send it | ||||||
|  |         data = array.tostring() | ||||||
|  |         self.last_response = self._client.stream_insert_block( | ||||||
|  |             self._path, data, start_ts, end_ts, binary=True) | ||||||
|  |  | ||||||
|  |         return | ||||||
							
								
								
									
										3
									
								
								nilmdb/cmdline/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										3
									
								
								nilmdb/cmdline/__init__.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,3 @@ | |||||||
|  | """nilmdb.cmdline""" | ||||||
|  |  | ||||||
|  | from nilmdb.cmdline.cmdline import Cmdline | ||||||
							
								
								
									
										173
									
								
								nilmdb/cmdline/cmdline.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										173
									
								
								nilmdb/cmdline/cmdline.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,173 @@ | |||||||
|  | """Command line client functionality""" | ||||||
|  |  | ||||||
|  | import os | ||||||
|  | import sys | ||||||
|  | import signal | ||||||
|  | import argparse | ||||||
|  | from argparse import ArgumentDefaultsHelpFormatter as def_form | ||||||
|  |  | ||||||
|  | import nilmdb.client | ||||||
|  | from nilmdb.utils.printf import fprintf, sprintf | ||||||
|  | import nilmdb.utils.time | ||||||
|  |  | ||||||
|  | import argcomplete | ||||||
|  | import datetime_tz | ||||||
|  |  | ||||||
|  | # Valid subcommands.  Defined in separate files just to break | ||||||
|  | # things up -- they're still called with Cmdline as self. | ||||||
|  | subcommands = ["help", "info", "create", "rename", "list", "intervals", | ||||||
|  |                "metadata", "insert", "extract", "remove", "destroy"] | ||||||
|  |  | ||||||
|  | # Import the subcommand modules | ||||||
|  | subcmd_mods = {} | ||||||
|  | for cmd in subcommands: | ||||||
|  |     subcmd_mods[cmd] = __import__("nilmdb.cmdline." + cmd, fromlist=[cmd]) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class JimArgumentParser(argparse.ArgumentParser): | ||||||
|  |     def parse_args(self, args=None, namespace=None): | ||||||
|  |         # Look for --version anywhere and change it to just "nilmtool | ||||||
|  |         # --version".  This makes "nilmtool cmd --version" work, which | ||||||
|  |         # is needed by help2man. | ||||||
|  |         if "--version" in (args or sys.argv[1:]): | ||||||
|  |             args = ["--version"] | ||||||
|  |         return argparse.ArgumentParser.parse_args(self, args, namespace) | ||||||
|  |  | ||||||
|  |     def error(self, message): | ||||||
|  |         self.print_usage(sys.stderr) | ||||||
|  |         self.exit(2, sprintf("error: %s\n", message)) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class Complete(): | ||||||
|  |     # Completion helpers, for using argcomplete (see | ||||||
|  |     # extras/nilmtool-bash-completion.sh) | ||||||
|  |     def escape(self, s): | ||||||
|  |         quote_chars = ["\\", "\"", "'", " "] | ||||||
|  |         for char in quote_chars: | ||||||
|  |             s = s.replace(char, "\\" + char) | ||||||
|  |         return s | ||||||
|  |  | ||||||
|  |     def none(self, prefix, parsed_args, **kwargs): | ||||||
|  |         return [] | ||||||
|  |     rate = none | ||||||
|  |     time = none | ||||||
|  |     url = none | ||||||
|  |  | ||||||
|  |     def path(self, prefix, parsed_args, **kwargs): | ||||||
|  |         client = nilmdb.client.Client(parsed_args.url) | ||||||
|  |         return (self.escape(s[0]) | ||||||
|  |                 for s in client.stream_list() | ||||||
|  |                 if s[0].startswith(prefix)) | ||||||
|  |  | ||||||
|  |     def layout(self, prefix, parsed_args, **kwargs): | ||||||
|  |         types = ["int8", "int16", "int32", "int64", | ||||||
|  |                  "uint8", "uint16", "uint32", "uint64", | ||||||
|  |                  "float32", "float64"] | ||||||
|  |         layouts = [] | ||||||
|  |         for i in range(1, 10): | ||||||
|  |             layouts.extend([(t + "_" + str(i)) for t in types]) | ||||||
|  |         return (lay for lay in layouts if lay.startswith(prefix)) | ||||||
|  |  | ||||||
|  |     def meta_key(self, prefix, parsed_args, **kwargs): | ||||||
|  |         return (kv.split('=')[0] for kv | ||||||
|  |                 in self.meta_keyval(prefix, parsed_args, **kwargs)) | ||||||
|  |  | ||||||
|  |     def meta_keyval(self, prefix, parsed_args, **kwargs): | ||||||
|  |         client = nilmdb.client.Client(parsed_args.url) | ||||||
|  |         path = parsed_args.path | ||||||
|  |         if not path: | ||||||
|  |             return [] | ||||||
|  |         results = [] | ||||||
|  |         for (k, v) in client.stream_get_metadata(path).items(): | ||||||
|  |             kv = self.escape(k + '=' + v) | ||||||
|  |             if kv.startswith(prefix): | ||||||
|  |                 results.append(kv) | ||||||
|  |         return results | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class Cmdline(): | ||||||
|  |  | ||||||
|  |     def __init__(self, argv=None): | ||||||
|  |         self.argv = argv or sys.argv[1:] | ||||||
|  |         self.client = None | ||||||
|  |         self.def_url = os.environ.get("NILMDB_URL", "http://localhost/nilmdb/") | ||||||
|  |         self.subcmd = {} | ||||||
|  |         self.complete = Complete() | ||||||
|  |         self.complete_output_stream = None  # overridden by test suite | ||||||
|  |  | ||||||
|  |     def arg_time(self, toparse): | ||||||
|  |         """Parse a time string argument""" | ||||||
|  |         try: | ||||||
|  |             return nilmdb.utils.time.parse_time(toparse) | ||||||
|  |         except ValueError as e: | ||||||
|  |             raise argparse.ArgumentTypeError(sprintf("%s \"%s\"", | ||||||
|  |                                                      str(e), toparse)) | ||||||
|  |  | ||||||
|  |     # Set up the parser | ||||||
|  |     def parser_setup(self): | ||||||
|  |         self.parser = JimArgumentParser(add_help=False, | ||||||
|  |                                         formatter_class=def_form) | ||||||
|  |  | ||||||
|  |         group = self.parser.add_argument_group("General options") | ||||||
|  |         group.add_argument("-h", "--help", action='help', | ||||||
|  |                            help='show this help message and exit') | ||||||
|  |         group.add_argument("-v", "--version", action="version", | ||||||
|  |                            version=nilmdb.__version__) | ||||||
|  |  | ||||||
|  |         group = self.parser.add_argument_group("Server") | ||||||
|  |         group.add_argument("-u", "--url", action="store", | ||||||
|  |                            default=self.def_url, | ||||||
|  |                            help="NilmDB server URL (default: %(default)s)" | ||||||
|  |                            ).completer = self.complete.url | ||||||
|  |  | ||||||
|  |         sub = self.parser.add_subparsers( | ||||||
|  |             title="Commands", dest="command", required=True, | ||||||
|  |             description="Use 'help command' or 'command --help' for more " | ||||||
|  |             "details on a particular command.") | ||||||
|  |  | ||||||
|  |         # Set up subcommands (defined in separate files) | ||||||
|  |         for cmd in subcommands: | ||||||
|  |             self.subcmd[cmd] = subcmd_mods[cmd].setup(self, sub) | ||||||
|  |  | ||||||
|  |     def die(self, formatstr, *args): | ||||||
|  |         fprintf(sys.stderr, formatstr + "\n", *args) | ||||||
|  |         if self.client: | ||||||
|  |             self.client.close() | ||||||
|  |         sys.exit(-1) | ||||||
|  |  | ||||||
|  |     def run(self): | ||||||
|  |         # Set SIGPIPE to its default handler -- we don't need Python | ||||||
|  |         # to catch it for us. | ||||||
|  |         signal.signal(signal.SIGPIPE, signal.SIG_DFL) | ||||||
|  |  | ||||||
|  |         # Clear cached timezone, so that we can pick up timezone changes | ||||||
|  |         # while running this from the test suite. | ||||||
|  |         datetime_tz._localtz = None | ||||||
|  |  | ||||||
|  |         # Run parser | ||||||
|  |         self.parser_setup() | ||||||
|  |         argcomplete.autocomplete(self.parser, exit_method=sys.exit, | ||||||
|  |                                  output_stream=self.complete_output_stream) | ||||||
|  |         self.args = self.parser.parse_args(self.argv) | ||||||
|  |  | ||||||
|  |         # Run arg verify handler if there is one | ||||||
|  |         if "verify" in self.args: | ||||||
|  |             self.args.verify(self) | ||||||
|  |  | ||||||
|  |         self.client = nilmdb.client.Client(self.args.url) | ||||||
|  |  | ||||||
|  |         # Make a test connection to make sure things work, | ||||||
|  |         # unless the particular command requests that we don't. | ||||||
|  |         if "no_test_connect" not in self.args: | ||||||
|  |             try: | ||||||
|  |                 self.client.version() | ||||||
|  |             except nilmdb.client.Error as e: | ||||||
|  |                 self.die("error connecting to server: %s", str(e)) | ||||||
|  |  | ||||||
|  |         # Now dispatch client request to appropriate function.  Parser | ||||||
|  |         # should have ensured that we don't have any unknown commands | ||||||
|  |         # here. | ||||||
|  |         retval = self.args.handler(self) or 0 | ||||||
|  |  | ||||||
|  |         self.client.close() | ||||||
|  |         sys.exit(retval) | ||||||
							
								
								
									
										38
									
								
								nilmdb/cmdline/create.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										38
									
								
								nilmdb/cmdline/create.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,38 @@ | |||||||
|  | from argparse import RawDescriptionHelpFormatter as raw_form | ||||||
|  |  | ||||||
|  | import nilmdb.client | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def setup(self, sub): | ||||||
|  |     cmd = sub.add_parser("create", help="Create a new stream", | ||||||
|  |                          formatter_class=raw_form, | ||||||
|  |                          description=""" | ||||||
|  | Create a new empty stream at the specified path and with the specified | ||||||
|  | layout type. | ||||||
|  |  | ||||||
|  | Layout types are of the format: type_count | ||||||
|  |  | ||||||
|  |   'type' is a data type like 'float32', 'float64', 'uint16', 'int32', etc. | ||||||
|  |  | ||||||
|  |   'count' is the number of columns of this type. | ||||||
|  |  | ||||||
|  |   For example, 'float32_8' means the data for this stream has 8 columns of | ||||||
|  |   32-bit floating point values. | ||||||
|  | """) | ||||||
|  |     cmd.set_defaults(handler=cmd_create) | ||||||
|  |     group = cmd.add_argument_group("Required arguments") | ||||||
|  |     group.add_argument("path", | ||||||
|  |                        help="Path (in database) of new stream, e.g. /foo/bar", | ||||||
|  |                        ).completer = self.complete.path | ||||||
|  |     group.add_argument("layout", | ||||||
|  |                        help="Layout type for new stream, e.g. float32_8", | ||||||
|  |                        ).completer = self.complete.layout | ||||||
|  |     return cmd | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def cmd_create(self): | ||||||
|  |     """Create new stream""" | ||||||
|  |     try: | ||||||
|  |         self.client.stream_create(self.args.path, self.args.layout) | ||||||
|  |     except nilmdb.client.ClientError as e: | ||||||
|  |         self.die("error creating stream: %s", str(e)) | ||||||
							
								
								
									
										52
									
								
								nilmdb/cmdline/destroy.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										52
									
								
								nilmdb/cmdline/destroy.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,52 @@ | |||||||
|  | import fnmatch | ||||||
|  |  | ||||||
|  | from argparse import ArgumentDefaultsHelpFormatter as def_form | ||||||
|  |  | ||||||
|  | from nilmdb.utils.printf import printf | ||||||
|  | import nilmdb.client | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def setup(self, sub): | ||||||
|  |     cmd = sub.add_parser("destroy", help="Delete a stream and all data", | ||||||
|  |                          formatter_class=def_form, | ||||||
|  |                          description=""" | ||||||
|  |                          Destroy the stream at the specified path. | ||||||
|  |                          The stream must be empty.  All metadata | ||||||
|  |                          related to the stream is permanently deleted. | ||||||
|  |  | ||||||
|  |                          Wildcards and multiple paths are supported. | ||||||
|  |                          """) | ||||||
|  |     cmd.set_defaults(handler=cmd_destroy) | ||||||
|  |     group = cmd.add_argument_group("Options") | ||||||
|  |     group.add_argument("-R", "--remove", action="store_true", | ||||||
|  |                        help="Remove all data before destroying stream") | ||||||
|  |     group.add_argument("-q", "--quiet", action="store_true", | ||||||
|  |                        help="Don't display names when destroying " | ||||||
|  |                        "multiple paths") | ||||||
|  |     group = cmd.add_argument_group("Required arguments") | ||||||
|  |     group.add_argument("path", nargs='+', | ||||||
|  |                        help="Path of the stream to delete, e.g. /foo/bar/*", | ||||||
|  |                        ).completer = self.complete.path | ||||||
|  |     return cmd | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def cmd_destroy(self): | ||||||
|  |     """Destroy stream""" | ||||||
|  |     streams = [s[0] for s in self.client.stream_list()] | ||||||
|  |     paths = [] | ||||||
|  |     for path in self.args.path: | ||||||
|  |         new = fnmatch.filter(streams, path) | ||||||
|  |         if not new: | ||||||
|  |             self.die("error: no stream matched path: %s", path) | ||||||
|  |         paths.extend(new) | ||||||
|  |  | ||||||
|  |     for path in paths: | ||||||
|  |         if not self.args.quiet and len(paths) > 1: | ||||||
|  |             printf("Destroying %s\n", path) | ||||||
|  |  | ||||||
|  |         try: | ||||||
|  |             if self.args.remove: | ||||||
|  |                 self.client.stream_remove(path) | ||||||
|  |             self.client.stream_destroy(path) | ||||||
|  |         except nilmdb.client.ClientError as e: | ||||||
|  |             self.die("error destroying stream: %s", str(e)) | ||||||
							
								
								
									
										96
									
								
								nilmdb/cmdline/extract.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										96
									
								
								nilmdb/cmdline/extract.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,96 @@ | |||||||
|  | import sys | ||||||
|  |  | ||||||
|  | from nilmdb.utils.printf import printf | ||||||
|  | import nilmdb.client | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def setup(self, sub): | ||||||
|  |     cmd = sub.add_parser("extract", help="Extract data", | ||||||
|  |                          description=""" | ||||||
|  |                          Extract data from a stream. | ||||||
|  |                          """) | ||||||
|  |     cmd.set_defaults(verify=cmd_extract_verify, | ||||||
|  |                      handler=cmd_extract) | ||||||
|  |  | ||||||
|  |     group = cmd.add_argument_group("Data selection") | ||||||
|  |     group.add_argument("path", | ||||||
|  |                        help="Path of stream, e.g. /foo/bar", | ||||||
|  |                        ).completer = self.complete.path | ||||||
|  |     group.add_argument("-s", "--start", required=True, | ||||||
|  |                        metavar="TIME", type=self.arg_time, | ||||||
|  |                        help="Starting timestamp (free-form, inclusive)", | ||||||
|  |                        ).completer = self.complete.time | ||||||
|  |     group.add_argument("-e", "--end", required=True, | ||||||
|  |                        metavar="TIME", type=self.arg_time, | ||||||
|  |                        help="Ending timestamp (free-form, noninclusive)", | ||||||
|  |                        ).completer = self.complete.time | ||||||
|  |  | ||||||
|  |     group = cmd.add_argument_group("Output format") | ||||||
|  |     group.add_argument("-B", "--binary", action="store_true", | ||||||
|  |                        help="Raw binary output") | ||||||
|  |     group.add_argument("-b", "--bare", action="store_true", | ||||||
|  |                        help="Exclude timestamps from output lines") | ||||||
|  |     group.add_argument("-a", "--annotate", action="store_true", | ||||||
|  |                        help="Include comments with some information " | ||||||
|  |                        "about the stream") | ||||||
|  |     group.add_argument("-m", "--markup", action="store_true", | ||||||
|  |                        help="Include comments with interval starts and ends") | ||||||
|  |     group.add_argument("-T", "--timestamp-raw", action="store_true", | ||||||
|  |                        help="Show raw timestamps in annotated information") | ||||||
|  |     group.add_argument("-c", "--count", action="store_true", | ||||||
|  |                        help="Just output a count of matched data points") | ||||||
|  |     return cmd | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def cmd_extract_verify(self): | ||||||
|  |     if self.args.start > self.args.end: | ||||||
|  |         self.parser.error("start is after end") | ||||||
|  |  | ||||||
|  |     if self.args.binary: | ||||||
|  |         if (self.args.bare or self.args.annotate or self.args.markup or | ||||||
|  |                 self.args.timestamp_raw or self.args.count): | ||||||
|  |             self.parser.error("--binary cannot be combined with other options") | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def cmd_extract(self): | ||||||
|  |     streams = self.client.stream_list(self.args.path) | ||||||
|  |     if len(streams) != 1: | ||||||
|  |         self.die("error getting stream info for path %s", self.args.path) | ||||||
|  |     layout = streams[0][1] | ||||||
|  |  | ||||||
|  |     if self.args.timestamp_raw: | ||||||
|  |         time_string = nilmdb.utils.time.timestamp_to_string | ||||||
|  |     else: | ||||||
|  |         time_string = nilmdb.utils.time.timestamp_to_human | ||||||
|  |  | ||||||
|  |     if self.args.annotate: | ||||||
|  |         printf("# path: %s\n", self.args.path) | ||||||
|  |         printf("# layout: %s\n", layout) | ||||||
|  |         printf("# start: %s\n", time_string(self.args.start)) | ||||||
|  |         printf("# end: %s\n", time_string(self.args.end)) | ||||||
|  |  | ||||||
|  |     printed = False | ||||||
|  |     if self.args.binary: | ||||||
|  |         printer = sys.stdout.buffer.write | ||||||
|  |     else: | ||||||
|  |         printer = lambda x: print(x.decode('utf-8')) | ||||||
|  |     bare = self.args.bare | ||||||
|  |     count = self.args.count | ||||||
|  |     for dataline in self.client.stream_extract(self.args.path, | ||||||
|  |                                                self.args.start, | ||||||
|  |                                                self.args.end, | ||||||
|  |                                                self.args.count, | ||||||
|  |                                                self.args.markup, | ||||||
|  |                                                self.args.binary): | ||||||
|  |         if bare and not count: | ||||||
|  |             # Strip timestamp (first element).  Doesn't make sense | ||||||
|  |             # if we are only returning a count. | ||||||
|  |             dataline = b' '.join(dataline.split(b' ')[1:]) | ||||||
|  |         printer(dataline) | ||||||
|  |         printed = True | ||||||
|  |     if not printed: | ||||||
|  |         if self.args.annotate: | ||||||
|  |             printf("# no data\n") | ||||||
|  |         return 2 | ||||||
|  |  | ||||||
|  |     return 0 | ||||||
							
								
								
									
										25
									
								
								nilmdb/cmdline/help.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										25
									
								
								nilmdb/cmdline/help.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,25 @@ | |||||||
|  | import argparse | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def setup(self, sub): | ||||||
|  |     cmd = sub.add_parser("help", help="Show detailed help for a command", | ||||||
|  |                          description=""" | ||||||
|  |                          Show help for a command. 'help command' is | ||||||
|  |                          the same as 'command --help'. | ||||||
|  |                          """) | ||||||
|  |     cmd.set_defaults(handler=cmd_help) | ||||||
|  |     cmd.set_defaults(no_test_connect=True) | ||||||
|  |     cmd.add_argument("command", nargs="?", | ||||||
|  |                      help="Command to get help about") | ||||||
|  |     cmd.add_argument("rest", nargs=argparse.REMAINDER, | ||||||
|  |                      help=argparse.SUPPRESS) | ||||||
|  |     return cmd | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def cmd_help(self): | ||||||
|  |     if self.args.command in self.subcmd: | ||||||
|  |         self.subcmd[self.args.command].print_help() | ||||||
|  |     else: | ||||||
|  |         self.parser.print_help() | ||||||
|  |  | ||||||
|  |     return | ||||||
							
								
								
									
										30
									
								
								nilmdb/cmdline/info.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										30
									
								
								nilmdb/cmdline/info.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,30 @@ | |||||||
|  | from argparse import ArgumentDefaultsHelpFormatter as def_form | ||||||
|  |  | ||||||
|  | import nilmdb.client | ||||||
|  | from nilmdb.utils.printf import printf | ||||||
|  | from nilmdb.utils import human_size | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def setup(self, sub): | ||||||
|  |     cmd = sub.add_parser("info", help="Server information", | ||||||
|  |                          formatter_class=def_form, | ||||||
|  |                          description=""" | ||||||
|  |                          List information about the server, like | ||||||
|  |                          version. | ||||||
|  |                          """) | ||||||
|  |     cmd.set_defaults(handler=cmd_info) | ||||||
|  |     return cmd | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def cmd_info(self): | ||||||
|  |     """Print info about the server""" | ||||||
|  |     printf("Client version: %s\n", nilmdb.__version__) | ||||||
|  |     printf("Server version: %s\n", self.client.version()) | ||||||
|  |     printf("Server URL: %s\n", self.client.geturl()) | ||||||
|  |     dbinfo = self.client.dbinfo() | ||||||
|  |     printf("Server database path: %s\n", dbinfo["path"]) | ||||||
|  |     for (desc, field) in [("used by NilmDB", "size"), | ||||||
|  |                           ("used by other", "other"), | ||||||
|  |                           ("reserved", "reserved"), | ||||||
|  |                           ("free", "free")]: | ||||||
|  |         printf("Server disk space %s: %s\n", desc, human_size(dbinfo[field])) | ||||||
							
								
								
									
										135
									
								
								nilmdb/cmdline/insert.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										135
									
								
								nilmdb/cmdline/insert.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,135 @@ | |||||||
|  | import sys | ||||||
|  |  | ||||||
|  | from nilmdb.utils.printf import printf | ||||||
|  | import nilmdb.client | ||||||
|  | import nilmdb.utils.timestamper as timestamper | ||||||
|  | import nilmdb.utils.time | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def setup(self, sub): | ||||||
|  |     cmd = sub.add_parser("insert", help="Insert data", | ||||||
|  |                          description=""" | ||||||
|  |                          Insert data into a stream. | ||||||
|  |                          """) | ||||||
|  |     cmd.set_defaults(verify=cmd_insert_verify, | ||||||
|  |                      handler=cmd_insert) | ||||||
|  |     cmd.add_argument("-q", "--quiet", action='store_true', | ||||||
|  |                      help='suppress unnecessary messages') | ||||||
|  |  | ||||||
|  |     group = cmd.add_argument_group("Timestamping", | ||||||
|  |                                    description=""" | ||||||
|  |                                    To add timestamps, specify the | ||||||
|  |                                    arguments --timestamp and --rate, | ||||||
|  |                                    and provide a starting time. | ||||||
|  |                                    """) | ||||||
|  |  | ||||||
|  |     group.add_argument("-t", "--timestamp", action="store_true", | ||||||
|  |                        help="Add timestamps to each line") | ||||||
|  |     group.add_argument("-r", "--rate", type=float, | ||||||
|  |                        help="Data rate, in Hz", | ||||||
|  |                        ).completer = self.complete.rate | ||||||
|  |  | ||||||
|  |     group = cmd.add_argument_group("Start time", | ||||||
|  |                                    description=""" | ||||||
|  |                                    Start time may be manually | ||||||
|  |                                    specified with --start, or guessed | ||||||
|  |                                    from the filenames using | ||||||
|  |                                    --filename.  Set the TZ environment | ||||||
|  |                                    variable to change the default | ||||||
|  |                                    timezone.""") | ||||||
|  |  | ||||||
|  |     exc = group.add_mutually_exclusive_group() | ||||||
|  |     exc.add_argument("-s", "--start", | ||||||
|  |                      metavar="TIME", type=self.arg_time, | ||||||
|  |                      help="Starting timestamp (free-form)", | ||||||
|  |                      ).completer = self.complete.time | ||||||
|  |     exc.add_argument("-f", "--filename", action="store_true", | ||||||
|  |                      help="Use filename to determine start time") | ||||||
|  |  | ||||||
|  |     group = cmd.add_argument_group("End time", | ||||||
|  |                                    description=""" | ||||||
|  |                                    End time for the overall stream. | ||||||
|  |                                    (required when not using --timestamp). | ||||||
|  |                                    Set the TZ environment | ||||||
|  |                                    variable to change the default | ||||||
|  |                                    timezone.""") | ||||||
|  |     group.add_argument("-e", "--end", | ||||||
|  |                        metavar="TIME", type=self.arg_time, | ||||||
|  |                        help="Ending timestamp (free-form)", | ||||||
|  |                        ).completer = self.complete.time | ||||||
|  |  | ||||||
|  |     group = cmd.add_argument_group("Required parameters") | ||||||
|  |     group.add_argument("path", | ||||||
|  |                        help="Path of stream, e.g. /foo/bar", | ||||||
|  |                        ).completer = self.complete.path | ||||||
|  |     group.add_argument("file", nargs='?', default='-', | ||||||
|  |                        help="File to insert (default: - (stdin))") | ||||||
|  |     return cmd | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def cmd_insert_verify(self): | ||||||
|  |     if self.args.timestamp: | ||||||
|  |         if not self.args.rate: | ||||||
|  |             self.die("error: --rate is needed, but was not specified") | ||||||
|  |         if not self.args.filename and self.args.start is None: | ||||||
|  |             self.die("error: need --start or --filename " | ||||||
|  |                      "when adding timestamps") | ||||||
|  |     else: | ||||||
|  |         if self.args.start is None or self.args.end is None: | ||||||
|  |             self.die("error: when not adding timestamps, --start and " | ||||||
|  |                      "--end are required") | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def cmd_insert(self): | ||||||
|  |     # Find requested stream | ||||||
|  |     streams = self.client.stream_list(self.args.path) | ||||||
|  |     if len(streams) != 1: | ||||||
|  |         self.die("error getting stream info for path %s", self.args.path) | ||||||
|  |  | ||||||
|  |     arg = self.args | ||||||
|  |  | ||||||
|  |     try: | ||||||
|  |         filename = arg.file | ||||||
|  |         if filename == '-': | ||||||
|  |             infile = sys.stdin.buffer | ||||||
|  |         else: | ||||||
|  |             try: | ||||||
|  |                 infile = open(filename, "rb") | ||||||
|  |             except IOError: | ||||||
|  |                 self.die("error opening input file %s", filename) | ||||||
|  |  | ||||||
|  |         if arg.start is None: | ||||||
|  |             try: | ||||||
|  |                 arg.start = nilmdb.utils.time.parse_time(filename) | ||||||
|  |             except ValueError: | ||||||
|  |                 self.die("error extracting start time from filename '%s'", | ||||||
|  |                          filename) | ||||||
|  |  | ||||||
|  |         if arg.timestamp: | ||||||
|  |             data = timestamper.TimestamperRate(infile, arg.start, arg.rate) | ||||||
|  |         else: | ||||||
|  |             data = iter(lambda: infile.read(1048576), b'') | ||||||
|  |  | ||||||
|  |         # Print info | ||||||
|  |         if not arg.quiet: | ||||||
|  |             printf(" Input file: %s\n", filename) | ||||||
|  |             printf(" Start time: %s\n", | ||||||
|  |                    nilmdb.utils.time.timestamp_to_human(arg.start)) | ||||||
|  |             if arg.end: | ||||||
|  |                 printf("   End time: %s\n", | ||||||
|  |                        nilmdb.utils.time.timestamp_to_human(arg.end)) | ||||||
|  |             if arg.timestamp: | ||||||
|  |                 printf("Timestamper: %s\n", str(data)) | ||||||
|  |  | ||||||
|  |         # Insert the data | ||||||
|  |         self.client.stream_insert(arg.path, data, arg.start, arg.end) | ||||||
|  |  | ||||||
|  |     except nilmdb.client.Error as e: | ||||||
|  |         # TODO: It would be nice to be able to offer better errors | ||||||
|  |         # here, particularly in the case of overlap, which just shows | ||||||
|  |         # ugly bracketed ranges of 16-digit numbers and a mangled URL. | ||||||
|  |         # Need to consider adding something like e.prettyprint() | ||||||
|  |         # that is smarter about the contents of the error. | ||||||
|  |         self.die("error inserting data: %s", str(e)) | ||||||
|  |  | ||||||
|  |     return | ||||||
							
								
								
									
										76
									
								
								nilmdb/cmdline/intervals.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										76
									
								
								nilmdb/cmdline/intervals.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,76 @@ | |||||||
|  | from argparse import ArgumentDefaultsHelpFormatter as def_form | ||||||
|  |  | ||||||
|  | from nilmdb.utils.printf import printf | ||||||
|  | import nilmdb.utils.time | ||||||
|  | from nilmdb.utils.interval import Interval | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def setup(self, sub): | ||||||
|  |     cmd = sub.add_parser("intervals", help="List intervals", | ||||||
|  |                          formatter_class=def_form, | ||||||
|  |                          description=""" | ||||||
|  |                          List intervals in a stream, similar to | ||||||
|  |                          'list --detail path'. | ||||||
|  |  | ||||||
|  |                          If '--diff diffpath' is provided, only | ||||||
|  |                          interval ranges that are present in 'path' | ||||||
|  |                          and not present in 'diffpath' are printed. | ||||||
|  |                          """) | ||||||
|  |     cmd.set_defaults(verify=cmd_intervals_verify, | ||||||
|  |                      handler=cmd_intervals) | ||||||
|  |  | ||||||
|  |     group = cmd.add_argument_group("Stream selection") | ||||||
|  |     group.add_argument("path", metavar="PATH", | ||||||
|  |                        help="List intervals for this path", | ||||||
|  |                        ).completer = self.complete.path | ||||||
|  |     group.add_argument("-d", "--diff", metavar="PATH", | ||||||
|  |                        help="Subtract intervals from this path", | ||||||
|  |                        ).completer = self.complete.path | ||||||
|  |  | ||||||
|  |     group = cmd.add_argument_group("Interval details") | ||||||
|  |     group.add_argument("-s", "--start", | ||||||
|  |                        metavar="TIME", type=self.arg_time, | ||||||
|  |                        help="Starting timestamp for intervals " | ||||||
|  |                        "(free-form, inclusive)", | ||||||
|  |                        ).completer = self.complete.time | ||||||
|  |     group.add_argument("-e", "--end", | ||||||
|  |                        metavar="TIME", type=self.arg_time, | ||||||
|  |                        help="Ending timestamp for intervals " | ||||||
|  |                        "(free-form, noninclusive)", | ||||||
|  |                        ).completer = self.complete.time | ||||||
|  |  | ||||||
|  |     group = cmd.add_argument_group("Misc options") | ||||||
|  |     group.add_argument("-T", "--timestamp-raw", action="store_true", | ||||||
|  |                        help="Show raw timestamps when printing times") | ||||||
|  |     group.add_argument("-o", "--optimize", action="store_true", | ||||||
|  |                        help="Optimize (merge adjacent) intervals") | ||||||
|  |  | ||||||
|  |     return cmd | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def cmd_intervals_verify(self): | ||||||
|  |     if self.args.start is not None and self.args.end is not None: | ||||||
|  |         if self.args.start >= self.args.end: | ||||||
|  |             self.parser.error("start must precede end") | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def cmd_intervals(self): | ||||||
|  |     """List intervals in a stream""" | ||||||
|  |     if self.args.timestamp_raw: | ||||||
|  |         time_string = nilmdb.utils.time.timestamp_to_string | ||||||
|  |     else: | ||||||
|  |         time_string = nilmdb.utils.time.timestamp_to_human | ||||||
|  |  | ||||||
|  |     try: | ||||||
|  |         intervals = (Interval(start, end) for (start, end) in | ||||||
|  |                      self.client.stream_intervals(self.args.path, | ||||||
|  |                                                   self.args.start, | ||||||
|  |                                                   self.args.end, | ||||||
|  |                                                   self.args.diff)) | ||||||
|  |         if self.args.optimize: | ||||||
|  |             intervals = nilmdb.utils.interval.optimize(intervals) | ||||||
|  |         for i in intervals: | ||||||
|  |             printf("[ %s -> %s ]\n", time_string(i.start), time_string(i.end)) | ||||||
|  |  | ||||||
|  |     except nilmdb.client.ClientError as e: | ||||||
|  |         self.die("error listing intervals: %s", str(e)) | ||||||
							
								
								
									
										105
									
								
								nilmdb/cmdline/list.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										105
									
								
								nilmdb/cmdline/list.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,105 @@ | |||||||
|  | import fnmatch | ||||||
|  | from argparse import ArgumentDefaultsHelpFormatter as def_form | ||||||
|  |  | ||||||
|  | from nilmdb.utils.printf import printf | ||||||
|  | import nilmdb.utils.time | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def setup(self, sub): | ||||||
|  |     cmd = sub.add_parser("list", help="List streams", | ||||||
|  |                          formatter_class=def_form, | ||||||
|  |                          description=""" | ||||||
|  |                          List streams available in the database, | ||||||
|  |                          optionally filtering by path.  Wildcards | ||||||
|  |                          are accepted; non-matching paths or wildcards | ||||||
|  |                          are ignored. | ||||||
|  |                          """) | ||||||
|  |     cmd.set_defaults(verify=cmd_list_verify, | ||||||
|  |                      handler=cmd_list) | ||||||
|  |  | ||||||
|  |     group = cmd.add_argument_group("Stream filtering") | ||||||
|  |     group.add_argument("path", metavar="PATH", default=["*"], nargs='*', | ||||||
|  |                        ).completer = self.complete.path | ||||||
|  |  | ||||||
|  |     group = cmd.add_argument_group("Interval info") | ||||||
|  |     group.add_argument("-E", "--ext", action="store_true", | ||||||
|  |                        help="Show extended stream info, like interval " | ||||||
|  |                        "extents and row count") | ||||||
|  |  | ||||||
|  |     group = cmd.add_argument_group("Interval details") | ||||||
|  |     group.add_argument("-d", "--detail", action="store_true", | ||||||
|  |                        help="Show available data time intervals") | ||||||
|  |     group.add_argument("-s", "--start", | ||||||
|  |                        metavar="TIME", type=self.arg_time, | ||||||
|  |                        help="Starting timestamp for intervals " | ||||||
|  |                        "(free-form, inclusive)", | ||||||
|  |                        ).completer = self.complete.time | ||||||
|  |     group.add_argument("-e", "--end", | ||||||
|  |                        metavar="TIME", type=self.arg_time, | ||||||
|  |                        help="Ending timestamp for intervals " | ||||||
|  |                        "(free-form, noninclusive)", | ||||||
|  |                        ).completer = self.complete.time | ||||||
|  |  | ||||||
|  |     group = cmd.add_argument_group("Misc options") | ||||||
|  |     group.add_argument("-T", "--timestamp-raw", action="store_true", | ||||||
|  |                        help="Show raw timestamps when printing times") | ||||||
|  |     group.add_argument("-l", "--layout", action="store_true", | ||||||
|  |                        help="Show layout type next to path name") | ||||||
|  |     group.add_argument("-n", "--no-decim", action="store_true", | ||||||
|  |                        help="Skip paths containing \"~decim-\"") | ||||||
|  |  | ||||||
|  |     return cmd | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def cmd_list_verify(self): | ||||||
|  |     if self.args.start is not None and self.args.end is not None: | ||||||
|  |         if self.args.start >= self.args.end: | ||||||
|  |             self.parser.error("start must precede end") | ||||||
|  |  | ||||||
|  |     if self.args.start is not None or self.args.end is not None: | ||||||
|  |         if not self.args.detail: | ||||||
|  |             self.parser.error("--start and --end only make sense " | ||||||
|  |                               "with --detail") | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def cmd_list(self): | ||||||
|  |     """List available streams""" | ||||||
|  |     streams = self.client.stream_list(extended=True) | ||||||
|  |  | ||||||
|  |     if self.args.timestamp_raw: | ||||||
|  |         time_string = nilmdb.utils.time.timestamp_to_string | ||||||
|  |     else: | ||||||
|  |         time_string = nilmdb.utils.time.timestamp_to_human | ||||||
|  |  | ||||||
|  |     for argpath in self.args.path: | ||||||
|  |         for stream in streams: | ||||||
|  |             (path, layout, int_min, int_max, rows, time) = stream[:6] | ||||||
|  |             if not fnmatch.fnmatch(path, argpath): | ||||||
|  |                 continue | ||||||
|  |             if self.args.no_decim and "~decim-" in path: | ||||||
|  |                 continue | ||||||
|  |  | ||||||
|  |             if self.args.layout: | ||||||
|  |                 printf("%s %s\n", path, layout) | ||||||
|  |             else: | ||||||
|  |                 printf("%s\n", path) | ||||||
|  |  | ||||||
|  |             if self.args.ext: | ||||||
|  |                 if int_min is None or int_max is None: | ||||||
|  |                     printf("  interval extents: (no data)\n") | ||||||
|  |                 else: | ||||||
|  |                     printf("  interval extents: %s -> %s\n", | ||||||
|  |                            time_string(int_min), time_string(int_max)) | ||||||
|  |                 printf("        total data: %d rows, %.6f seconds\n", | ||||||
|  |                        rows or 0, | ||||||
|  |                        nilmdb.utils.time.timestamp_to_seconds(time or 0)) | ||||||
|  |  | ||||||
|  |             if self.args.detail: | ||||||
|  |                 printed = False | ||||||
|  |                 for (start, end) in self.client.stream_intervals( | ||||||
|  |                         path, self.args.start, self.args.end): | ||||||
|  |                     printf("  [ %s -> %s ]\n", | ||||||
|  |                            time_string(start), time_string(end)) | ||||||
|  |                     printed = True | ||||||
|  |                 if not printed: | ||||||
|  |                     printf("  (no intervals)\n") | ||||||
							
								
								
									
										90
									
								
								nilmdb/cmdline/metadata.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										90
									
								
								nilmdb/cmdline/metadata.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,90 @@ | |||||||
|  | from nilmdb.utils.printf import printf | ||||||
|  | import nilmdb | ||||||
|  | import nilmdb.client | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def setup(self, sub): | ||||||
|  |     cmd = sub.add_parser("metadata", help="Get or set stream metadata", | ||||||
|  |                          description=""" | ||||||
|  |                          Get or set key=value metadata associated with | ||||||
|  |                          a stream. | ||||||
|  |                          """, | ||||||
|  |                          usage="%(prog)s path [-g [key ...] | " | ||||||
|  |                          "-s key=value [...] | -u key=value [...]] | " | ||||||
|  |                          "-d [key ...]") | ||||||
|  |     cmd.set_defaults(handler=cmd_metadata) | ||||||
|  |  | ||||||
|  |     group = cmd.add_argument_group("Required arguments") | ||||||
|  |     group.add_argument("path", | ||||||
|  |                        help="Path of stream, e.g. /foo/bar", | ||||||
|  |                        ).completer = self.complete.path | ||||||
|  |  | ||||||
|  |     group = cmd.add_argument_group("Actions") | ||||||
|  |     exc = group.add_mutually_exclusive_group() | ||||||
|  |     exc.add_argument("-g", "--get", nargs="*", metavar="key", | ||||||
|  |                      help="Get metadata for specified keys (default all)", | ||||||
|  |                      ).completer = self.complete.meta_key | ||||||
|  |     exc.add_argument("-s", "--set", nargs="+", metavar="key=value", | ||||||
|  |                      help="Replace all metadata with provided " | ||||||
|  |                      "key=value pairs", | ||||||
|  |                      ).completer = self.complete.meta_keyval | ||||||
|  |     exc.add_argument("-u", "--update", nargs="+", metavar="key=value", | ||||||
|  |                      help="Update metadata using provided " | ||||||
|  |                      "key=value pairs", | ||||||
|  |                      ).completer = self.complete.meta_keyval | ||||||
|  |     exc.add_argument("-d", "--delete", nargs="*", metavar="key", | ||||||
|  |                      help="Delete metadata for specified keys (default all)", | ||||||
|  |                      ).completer = self.complete.meta_key | ||||||
|  |     return cmd | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def cmd_metadata(self): | ||||||
|  |     """Manipulate metadata""" | ||||||
|  |     if self.args.set is not None or self.args.update is not None: | ||||||
|  |         # Either set, or update | ||||||
|  |         if self.args.set is not None: | ||||||
|  |             keyvals = self.args.set | ||||||
|  |             handler = self.client.stream_set_metadata | ||||||
|  |         else: | ||||||
|  |             keyvals = self.args.update | ||||||
|  |             handler = self.client.stream_update_metadata | ||||||
|  |  | ||||||
|  |         # Extract key=value pairs | ||||||
|  |         data = {} | ||||||
|  |         for keyval in keyvals: | ||||||
|  |             kv = keyval.split('=') | ||||||
|  |             if len(kv) != 2 or kv[0] == "": | ||||||
|  |                 self.die("error parsing key=value argument '%s'", keyval) | ||||||
|  |             data[kv[0]] = kv[1] | ||||||
|  |  | ||||||
|  |         # Make the call | ||||||
|  |         try: | ||||||
|  |             handler(self.args.path, data) | ||||||
|  |         except nilmdb.client.ClientError as e: | ||||||
|  |             self.die("error setting/updating metadata: %s", str(e)) | ||||||
|  |     elif self.args.delete is not None: | ||||||
|  |         # Delete (by setting values to empty strings) | ||||||
|  |         keys = None | ||||||
|  |         if self.args.delete: | ||||||
|  |             keys = list(self.args.delete) | ||||||
|  |         try: | ||||||
|  |             data = self.client.stream_get_metadata(self.args.path, keys) | ||||||
|  |             for key in data: | ||||||
|  |                 data[key] = "" | ||||||
|  |             self.client.stream_update_metadata(self.args.path, data) | ||||||
|  |         except nilmdb.client.ClientError as e: | ||||||
|  |             self.die("error deleting metadata: %s", str(e)) | ||||||
|  |     else: | ||||||
|  |         # Get (or unspecified) | ||||||
|  |         keys = None | ||||||
|  |         if self.args.get: | ||||||
|  |             keys = list(self.args.get) | ||||||
|  |         try: | ||||||
|  |             data = self.client.stream_get_metadata(self.args.path, keys) | ||||||
|  |         except nilmdb.client.ClientError as e: | ||||||
|  |             self.die("error getting metadata: %s", str(e)) | ||||||
|  |         for key, value in sorted(data.items()): | ||||||
|  |             # Print nonexistant keys as having empty value | ||||||
|  |             if value is None: | ||||||
|  |                 value = "" | ||||||
|  |             printf("%s=%s\n", key, value) | ||||||
							
								
								
									
										59
									
								
								nilmdb/cmdline/remove.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										59
									
								
								nilmdb/cmdline/remove.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,59 @@ | |||||||
|  | import fnmatch | ||||||
|  |  | ||||||
|  | from nilmdb.utils.printf import printf | ||||||
|  | import nilmdb.client | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def setup(self, sub): | ||||||
|  |     cmd = sub.add_parser("remove", help="Remove data", | ||||||
|  |                          description=""" | ||||||
|  |                          Remove all data from a specified time range within a | ||||||
|  |                          stream.  If multiple streams or wildcards are | ||||||
|  |                          provided, the same time range is removed from all | ||||||
|  |                          streams. | ||||||
|  |                          """) | ||||||
|  |     cmd.set_defaults(handler=cmd_remove) | ||||||
|  |  | ||||||
|  |     group = cmd.add_argument_group("Data selection") | ||||||
|  |     group.add_argument("path", nargs='+', | ||||||
|  |                        help="Path of stream, e.g. /foo/bar/*", | ||||||
|  |                        ).completer = self.complete.path | ||||||
|  |     group.add_argument("-s", "--start", required=True, | ||||||
|  |                        metavar="TIME", type=self.arg_time, | ||||||
|  |                        help="Starting timestamp (free-form, inclusive)", | ||||||
|  |                        ).completer = self.complete.time | ||||||
|  |     group.add_argument("-e", "--end", required=True, | ||||||
|  |                        metavar="TIME", type=self.arg_time, | ||||||
|  |                        help="Ending timestamp (free-form, noninclusive)", | ||||||
|  |                        ).completer = self.complete.time | ||||||
|  |  | ||||||
|  |     group = cmd.add_argument_group("Output format") | ||||||
|  |     group.add_argument("-q", "--quiet", action="store_true", | ||||||
|  |                        help="Don't display names when removing " | ||||||
|  |                        "from multiple paths") | ||||||
|  |     group.add_argument("-c", "--count", action="store_true", | ||||||
|  |                        help="Output number of data points removed") | ||||||
|  |     return cmd | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def cmd_remove(self): | ||||||
|  |     streams = [s[0] for s in self.client.stream_list()] | ||||||
|  |     paths = [] | ||||||
|  |     for path in self.args.path: | ||||||
|  |         new = fnmatch.filter(streams, path) | ||||||
|  |         if not new: | ||||||
|  |             self.die("error: no stream matched path: %s", path) | ||||||
|  |         paths.extend(new) | ||||||
|  |  | ||||||
|  |     try: | ||||||
|  |         for path in paths: | ||||||
|  |             if not self.args.quiet and len(paths) > 1: | ||||||
|  |                 printf("Removing from %s\n", path) | ||||||
|  |             count = self.client.stream_remove(path, | ||||||
|  |                                               self.args.start, self.args.end) | ||||||
|  |             if self.args.count: | ||||||
|  |                 printf("%d\n", count) | ||||||
|  |     except nilmdb.client.ClientError as e: | ||||||
|  |         self.die("error removing data: %s", str(e)) | ||||||
|  |  | ||||||
|  |     return 0 | ||||||
							
								
								
									
										32
									
								
								nilmdb/cmdline/rename.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										32
									
								
								nilmdb/cmdline/rename.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,32 @@ | |||||||
|  | from argparse import ArgumentDefaultsHelpFormatter as def_form | ||||||
|  |  | ||||||
|  | import nilmdb.client | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def setup(self, sub): | ||||||
|  |     cmd = sub.add_parser("rename", help="Rename a stream", | ||||||
|  |                          formatter_class=def_form, | ||||||
|  |                          description=""" | ||||||
|  |                          Rename a stream. | ||||||
|  |  | ||||||
|  |                          Only the stream's path is renamed; no | ||||||
|  |                          metadata is changed. | ||||||
|  |                          """) | ||||||
|  |     cmd.set_defaults(handler=cmd_rename) | ||||||
|  |     group = cmd.add_argument_group("Required arguments") | ||||||
|  |     group.add_argument("oldpath", | ||||||
|  |                        help="Old path, e.g. /foo/old", | ||||||
|  |                        ).completer = self.complete.path | ||||||
|  |     group.add_argument("newpath", | ||||||
|  |                        help="New path, e.g. /foo/bar/new", | ||||||
|  |                        ).completer = self.complete.path | ||||||
|  |  | ||||||
|  |     return cmd | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def cmd_rename(self): | ||||||
|  |     """Rename a stream""" | ||||||
|  |     try: | ||||||
|  |         self.client.stream_rename(self.args.oldpath, self.args.newpath) | ||||||
|  |     except nilmdb.client.ClientError as e: | ||||||
|  |         self.die("error renaming stream: %s", str(e)) | ||||||
| @@ -1,37 +0,0 @@ | |||||||
| """FileInterval |  | ||||||
|  |  | ||||||
| An Interval that is backed with file data storage""" |  | ||||||
|  |  | ||||||
| from nilmdb.interval import Interval, IntervalSet, IntervalError |  | ||||||
| from datetime import datetime |  | ||||||
| import bisect |  | ||||||
|  |  | ||||||
| class FileInterval(Interval): |  | ||||||
|     """Represents an interval of time and its corresponding data""" |  | ||||||
|  |  | ||||||
|     def __init__(self, start, end,  |  | ||||||
|                  filename,  |  | ||||||
|                  start_offset = None, end_offset = None): |  | ||||||
|         self.start = start |  | ||||||
|         self.end = end |  | ||||||
|         self.filename = filename |  | ||||||
|         if start_offset is None: |  | ||||||
|             start_offset = 0 |  | ||||||
|         self.start_offset = start_offset |  | ||||||
|         if end_offset is None: |  | ||||||
|             f = open(filename, 'rb') |  | ||||||
|             f.seek(0, os.SEEK_END) |  | ||||||
|             end_offset = f.tell() |  | ||||||
|         self.end_offset = end_offset |  | ||||||
|  |  | ||||||
|     def __setattr__(self, name, value): |  | ||||||
|         pass |  | ||||||
|  |  | ||||||
|     def subset(self, start, end): |  | ||||||
|         """Return a new Interval that is a subset of this one""" |  | ||||||
|         # TODO: Any magic regarding file/offset/length mapping for subsets |  | ||||||
|         if (start < self.start or end > self.end): |  | ||||||
|             raise IntervalError("not a subset") |  | ||||||
|         return FileInterval(start, end)             |  | ||||||
|  |  | ||||||
|      |  | ||||||
							
								
								
									
										3
									
								
								nilmdb/fsck/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										3
									
								
								nilmdb/fsck/__init__.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,3 @@ | |||||||
|  | """nilmdb.fsck""" | ||||||
|  |  | ||||||
|  | from nilmdb.fsck.fsck import Fsck | ||||||
							
								
								
									
										544
									
								
								nilmdb/fsck/fsck.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										544
									
								
								nilmdb/fsck/fsck.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,544 @@ | |||||||
|  | # -*- coding: utf-8 -*- | ||||||
|  |  | ||||||
|  | """Check database consistency, with some ability to fix problems. | ||||||
|  | This should be able to fix cases where a database gets corrupted due | ||||||
|  | to unexpected system shutdown, and detect other cases that may cause | ||||||
|  | NilmDB to return errors when trying to manipulate the database.""" | ||||||
|  |  | ||||||
|  | import nilmdb.utils | ||||||
|  | import nilmdb.server | ||||||
|  | import nilmdb.client.numpyclient | ||||||
|  | from nilmdb.utils.interval import IntervalError | ||||||
|  | from nilmdb.server.interval import Interval, IntervalSet | ||||||
|  | from nilmdb.utils.printf import printf, fprintf, sprintf | ||||||
|  |  | ||||||
|  | from collections import defaultdict | ||||||
|  | import sqlite3 | ||||||
|  | import os | ||||||
|  | import sys | ||||||
|  | import progressbar | ||||||
|  | import re | ||||||
|  | import shutil | ||||||
|  | import pickle | ||||||
|  | import numpy | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class FsckError(Exception): | ||||||
|  |     def __init__(self, msg="", *args): | ||||||
|  |         if args: | ||||||
|  |             msg = sprintf(msg, *args) | ||||||
|  |         Exception.__init__(self, msg) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class FixableFsckError(FsckError): | ||||||
|  |     def __init__(self, msg=""): | ||||||
|  |         FsckError.__init__(self, f'{msg}\nThis may be fixable with "--fix".') | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class RetryFsck(FsckError): | ||||||
|  |     pass | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class FsckFormatError(FsckError): | ||||||
|  |     pass | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def log(format, *args): | ||||||
|  |     printf(format, *args) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def err(format, *args): | ||||||
|  |     fprintf(sys.stderr, format, *args) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | # Decorator that retries a function if it returns a specific value | ||||||
|  | def retry_if_raised(exc, message=None, max_retries=100): | ||||||
|  |     def f1(func): | ||||||
|  |         def f2(*args, **kwargs): | ||||||
|  |             for n in range(max_retries): | ||||||
|  |                 try: | ||||||
|  |                     return func(*args, **kwargs) | ||||||
|  |                 except exc: | ||||||
|  |                     if message: | ||||||
|  |                         log("%s\n\n", message) | ||||||
|  |             raise Exception("Max number of retries (%d) exceeded; giving up" % | ||||||
|  |                             max_retries) | ||||||
|  |         return f2 | ||||||
|  |     return f1 | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class Progress(object): | ||||||
|  |     def __init__(self, maxval): | ||||||
|  |         if maxval == 0: | ||||||
|  |             maxval = 1 | ||||||
|  |         self.bar = progressbar.ProgressBar( | ||||||
|  |             maxval=maxval, | ||||||
|  |             widgets=[progressbar.Percentage(), ' ', | ||||||
|  |                      progressbar.Bar(), ' ', | ||||||
|  |                      progressbar.ETA()]) | ||||||
|  |         self.bar.term_width = self.bar.term_width or 75 | ||||||
|  |  | ||||||
|  |     def __enter__(self): | ||||||
|  |         self.bar.start() | ||||||
|  |         self.last_update = 0 | ||||||
|  |         return self | ||||||
|  |  | ||||||
|  |     def __exit__(self, exc_type, exc_value, traceback): | ||||||
|  |         if exc_type is None: | ||||||
|  |             self.bar.finish() | ||||||
|  |         else: | ||||||
|  |             printf("\n") | ||||||
|  |  | ||||||
|  |     def update(self, val): | ||||||
|  |         self.bar.update(val) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class Fsck(object): | ||||||
|  |     def __init__(self, path, fix=False): | ||||||
|  |         self.basepath = path | ||||||
|  |         self.sqlpath = os.path.join(path, "data.sql") | ||||||
|  |         self.bulkpath = os.path.join(path, "data") | ||||||
|  |         self.bulklock = os.path.join(path, "data.lock") | ||||||
|  |         self.fix = fix | ||||||
|  |  | ||||||
|  |     ### Main checks | ||||||
|  |  | ||||||
|  |     @retry_if_raised(RetryFsck, "Something was fixed: restarting fsck") | ||||||
|  |     def check(self, skip_data=False): | ||||||
|  |         self.bulk = None | ||||||
|  |         self.sql = None | ||||||
|  |         try: | ||||||
|  |             self.check_paths() | ||||||
|  |             self.check_sql() | ||||||
|  |             self.check_streams() | ||||||
|  |             self.check_intervals() | ||||||
|  |             if skip_data: | ||||||
|  |                 log("skipped data check\n") | ||||||
|  |             else: | ||||||
|  |                 self.check_data() | ||||||
|  |         finally: | ||||||
|  |             if self.bulk: | ||||||
|  |                 self.bulk.close() | ||||||
|  |             if self.sql:  # pragma: no cover | ||||||
|  |                 # (coverage doesn't handle finally clauses correctly; | ||||||
|  |                 # both branches here are tested) | ||||||
|  |                 self.sql.commit() | ||||||
|  |                 self.sql.close() | ||||||
|  |         log("ok\n") | ||||||
|  |  | ||||||
|  |     ### Check basic path structure | ||||||
|  |  | ||||||
|  |     def check_paths(self): | ||||||
|  |         log("checking paths\n") | ||||||
|  |         if self.bulk: | ||||||
|  |             self.bulk.close() | ||||||
|  |         if not os.path.isfile(self.sqlpath): | ||||||
|  |             raise FsckError("SQL database missing (%s)", self.sqlpath) | ||||||
|  |         if not os.path.isdir(self.bulkpath): | ||||||
|  |             raise FsckError("Bulk data directory missing (%s)", self.bulkpath) | ||||||
|  |         with open(self.bulklock, "w") as lockfile: | ||||||
|  |             if not nilmdb.utils.lock.exclusive_lock(lockfile): | ||||||
|  |                 raise FsckError('Database already locked by another process\n' | ||||||
|  |                                 'Make sure all other processes that might be ' | ||||||
|  |                                 'using the database are stopped.\n' | ||||||
|  |                                 'Restarting apache will cause it to unlock ' | ||||||
|  |                                 'the db until a request is received.') | ||||||
|  |             # unlocked immediately | ||||||
|  |         self.bulk = nilmdb.server.bulkdata.BulkData(self.basepath) | ||||||
|  |  | ||||||
|  |     ### Check SQL database health | ||||||
|  |  | ||||||
|  |     def check_sql(self): | ||||||
|  |         log("checking sqlite database\n") | ||||||
|  |  | ||||||
|  |         self.sql = sqlite3.connect(self.sqlpath) | ||||||
|  |         with self.sql: | ||||||
|  |             cur = self.sql.cursor() | ||||||
|  |             ver = cur.execute("PRAGMA user_version").fetchone()[0] | ||||||
|  |             good = max(nilmdb.server.nilmdb._sql_schema_updates.keys()) | ||||||
|  |             if ver != good: | ||||||
|  |                 raise FsckError("database version %d too old, should be %d", | ||||||
|  |                                 ver, good) | ||||||
|  |             self.stream_path = {} | ||||||
|  |             self.stream_layout = {} | ||||||
|  |             log("  loading paths\n") | ||||||
|  |             result = cur.execute("SELECT id, path, layout FROM streams") | ||||||
|  |             for r in result: | ||||||
|  |                 if r[0] in self.stream_path: | ||||||
|  |                     raise FsckError("duplicated ID %d in stream IDs", r[0]) | ||||||
|  |                 self.stream_path[r[0]] = r[1] | ||||||
|  |                 self.stream_layout[r[0]] = r[2] | ||||||
|  |  | ||||||
|  |             log("  loading intervals\n") | ||||||
|  |             self.stream_interval = defaultdict(list) | ||||||
|  |             result = cur.execute("SELECT stream_id, start_time, end_time, " | ||||||
|  |                                  "start_pos, end_pos FROM ranges " | ||||||
|  |                                  "ORDER BY start_time") | ||||||
|  |             for r in result: | ||||||
|  |                 if r[0] not in self.stream_path: | ||||||
|  |                     raise FsckError("interval ID %d not in streams", r[0]) | ||||||
|  |                 self.stream_interval[r[0]].append((r[1], r[2], r[3], r[4])) | ||||||
|  |  | ||||||
|  |             log("  loading metadata\n") | ||||||
|  |             self.stream_meta = defaultdict(dict) | ||||||
|  |             result = cur.execute("SELECT stream_id, key, value FROM metadata") | ||||||
|  |             for r in result: | ||||||
|  |                 if r[0] not in self.stream_path: | ||||||
|  |                     raise FsckError("metadata ID %d not in streams", r[0]) | ||||||
|  |                 if r[1] in self.stream_meta[r[0]]: | ||||||
|  |                     raise FsckError( | ||||||
|  |                         "duplicate metadata key '%s' for stream %d", | ||||||
|  |                         r[1], r[0]) | ||||||
|  |                 self.stream_meta[r[0]][r[1]] = r[2] | ||||||
|  |  | ||||||
|  |     ### Check streams and basic interval overlap | ||||||
|  |  | ||||||
|  |     def check_streams(self): | ||||||
|  |         ids = list(self.stream_path.keys()) | ||||||
|  |         log("checking %s streams\n", "{:,d}".format(len(ids))) | ||||||
|  |         with Progress(len(ids)) as pbar: | ||||||
|  |             for i, sid in enumerate(ids): | ||||||
|  |                 pbar.update(i) | ||||||
|  |                 path = self.stream_path[sid] | ||||||
|  |  | ||||||
|  |                 # unique path, valid layout | ||||||
|  |                 if list(self.stream_path.values()).count(path) != 1: | ||||||
|  |                     raise FsckError("duplicated path %s", path) | ||||||
|  |                 layout = self.stream_layout[sid].split('_')[0] | ||||||
|  |                 if layout not in ('int8', 'int16', 'int32', 'int64', | ||||||
|  |                                   'uint8', 'uint16', 'uint32', 'uint64', | ||||||
|  |                                   'float32', 'float64'): | ||||||
|  |                     raise FsckError("bad layout %s for %s", layout, path) | ||||||
|  |                 count = int(self.stream_layout[sid].split('_')[1]) | ||||||
|  |                 if count < 1 or count > 1024: | ||||||
|  |                     raise FsckError("bad count %d for %s", count, path) | ||||||
|  |  | ||||||
|  |                 # must exist in bulkdata | ||||||
|  |                 bulk = self.bulkpath + path | ||||||
|  |                 bulk = bulk.encode('utf-8') | ||||||
|  |                 if not os.path.isdir(bulk): | ||||||
|  |                     raise FsckError("%s: missing bulkdata dir", path) | ||||||
|  |                 if not nilmdb.server.bulkdata.Table.exists(bulk): | ||||||
|  |                     raise FsckError("%s: bad bulkdata table", path) | ||||||
|  |  | ||||||
|  |                 # intervals don't overlap.  Abuse IntervalSet to check | ||||||
|  |                 # for intervals in file positions, too. | ||||||
|  |                 timeiset = IntervalSet() | ||||||
|  |                 posiset = IntervalSet() | ||||||
|  |                 for (stime, etime, spos, epos) in self.stream_interval[sid]: | ||||||
|  |                     new = Interval(stime, etime) | ||||||
|  |                     try: | ||||||
|  |                         timeiset += new | ||||||
|  |                     except IntervalError: | ||||||
|  |                         raise FsckError("%s: overlap in intervals:\n" | ||||||
|  |                                         "set: %s\nnew: %s", | ||||||
|  |                                         path, str(timeiset), str(new)) | ||||||
|  |                     if spos != epos: | ||||||
|  |                         new = Interval(spos, epos) | ||||||
|  |                         try: | ||||||
|  |                             posiset += new | ||||||
|  |                         except IntervalError: | ||||||
|  |                             raise FsckError("%s: overlap in file offsets:\n" | ||||||
|  |                                             "set: %s\nnew: %s", | ||||||
|  |                                             path, str(posiset), str(new)) | ||||||
|  |  | ||||||
|  |                 try: | ||||||
|  |                     # Check bulkdata | ||||||
|  |                     self.check_bulkdata(sid, path, bulk) | ||||||
|  |  | ||||||
|  |                     # Check that we can open bulkdata | ||||||
|  |                     tab = nilmdb.server.bulkdata.Table(bulk) | ||||||
|  |                 except FsckFormatError as e: | ||||||
|  |                     # If there are no files except _format, try deleting | ||||||
|  |                     # the entire stream; this may remove metadata, but | ||||||
|  |                     # it's probably unimportant. | ||||||
|  |                     files = list(os.listdir(bulk)) | ||||||
|  |                     if len(files) > 1: | ||||||
|  |                         raise FsckFormatError(f"{path}: can't load _format, " | ||||||
|  |                                               f"but data is also present") | ||||||
|  |  | ||||||
|  |                     # Since the stream was empty, just remove it | ||||||
|  |                     self.fix_remove_stream(sid, path, bulk, | ||||||
|  |                                            "empty, with corrupted format file") | ||||||
|  |                 except FsckError as e: | ||||||
|  |                     raise e | ||||||
|  |                 except Exception as e: # pragma: no cover | ||||||
|  |                     # No coverage because this is an unknown/unexpected error | ||||||
|  |                     raise FsckError("%s: can't open bulkdata: %s", | ||||||
|  |                                     path, str(e)) | ||||||
|  |                 tab.close() | ||||||
|  |  | ||||||
|  |     ### Check that bulkdata is good enough to be opened | ||||||
|  |  | ||||||
|  |     @retry_if_raised(RetryFsck) | ||||||
|  |     def check_bulkdata(self, sid, path, bulk): | ||||||
|  |         try: | ||||||
|  |             with open(os.path.join(bulk, b"_format"), "rb") as f: | ||||||
|  |                 fmt = pickle.load(f) | ||||||
|  |         except Exception as e: | ||||||
|  |             raise FsckFormatError(f"{path}: can't load _format file ({e})") | ||||||
|  |  | ||||||
|  |         if fmt["version"] != 3: | ||||||
|  |             raise FsckFormatError("%s: bad or unsupported bulkdata version %d", | ||||||
|  |                                   path, fmt["version"]) | ||||||
|  |         rows_per_file = int(fmt["rows_per_file"]) | ||||||
|  |         if rows_per_file < 1: | ||||||
|  |             raise FsckFormatError(f"{path}: bad rows_per_file {rows_per_file}") | ||||||
|  |         files_per_dir = int(fmt["files_per_dir"]) | ||||||
|  |         if files_per_dir < 1: | ||||||
|  |             raise FsckFormatError(f"{path}: bad files_per_dir {files_per_dir}") | ||||||
|  |         layout = fmt["layout"] | ||||||
|  |         if layout != self.stream_layout[sid]: | ||||||
|  |             raise FsckFormatError("%s: layout mismatch %s != %s", path, | ||||||
|  |                                   layout, self.stream_layout[sid]) | ||||||
|  |  | ||||||
|  |         # Every file should have a size that's the multiple of the row size | ||||||
|  |         rkt = nilmdb.server.rocket.Rocket(layout, None) | ||||||
|  |         row_size = rkt.binary_size | ||||||
|  |         rkt.close() | ||||||
|  |  | ||||||
|  |         # Find all directories | ||||||
|  |         regex = re.compile(b"^[0-9a-f]{4,}$") | ||||||
|  |         subdirs = sorted(filter(regex.search, os.listdir(bulk)), | ||||||
|  |                          key=lambda x: int(x, 16), reverse=True) | ||||||
|  |         for subdir in subdirs: | ||||||
|  |             # Find all files in that dir | ||||||
|  |             subpath = os.path.join(bulk, subdir) | ||||||
|  |             files = list(filter(regex.search, os.listdir(subpath))) | ||||||
|  |             if not files: | ||||||
|  |                 self.fix_empty_subdir(subpath) | ||||||
|  |  | ||||||
|  |             # Verify that their size is a multiple of the row size | ||||||
|  |             for filename in files: | ||||||
|  |                 filepath = os.path.join(subpath, filename) | ||||||
|  |                 offset = os.path.getsize(filepath) | ||||||
|  |                 if offset % row_size: | ||||||
|  |                     self.fix_bad_filesize(path, filepath, offset, row_size) | ||||||
|  |  | ||||||
|  |     def fix_empty_subdir(self, subpath): | ||||||
|  |         msg = sprintf("bulkdata path %s is missing data files", subpath) | ||||||
|  |         if not self.fix: | ||||||
|  |             raise FixableFsckError(msg) | ||||||
|  |         # Try to fix it by just deleting whatever is present, | ||||||
|  |         # as long as it's only ".removed" files. | ||||||
|  |         err("\n%s\n", msg) | ||||||
|  |         for fn in os.listdir(subpath): | ||||||
|  |             if not fn.endswith(b".removed"): | ||||||
|  |                 raise FsckError("can't fix automatically: please manually " | ||||||
|  |                                 "remove the file '%s' and try again", | ||||||
|  |                                 os.path.join(subpath, fn).decode( | ||||||
|  |                                     'utf-8', errors='backslashreplace')) | ||||||
|  |         # Remove the whole thing | ||||||
|  |         err("Removing empty subpath\n") | ||||||
|  |         shutil.rmtree(subpath) | ||||||
|  |         raise RetryFsck | ||||||
|  |  | ||||||
|  |     def fix_bad_filesize(self, path, filepath, offset, row_size): | ||||||
|  |         extra = offset % row_size | ||||||
|  |         msg = sprintf("%s: size of file %s (%d) is not a multiple" + | ||||||
|  |                       " of row size (%d): %d extra bytes present", | ||||||
|  |                       path, filepath, offset, row_size, extra) | ||||||
|  |         if not self.fix: | ||||||
|  |             raise FixableFsckError(msg) | ||||||
|  |         # Try to fix it by just truncating the file | ||||||
|  |         err("\n%s\n", msg) | ||||||
|  |         newsize = offset - extra | ||||||
|  |         err("Truncating file to %d bytes and retrying\n", newsize) | ||||||
|  |         with open(filepath, "r+b") as f: | ||||||
|  |             f.truncate(newsize) | ||||||
|  |             raise RetryFsck | ||||||
|  |  | ||||||
|  |     def fix_remove_stream(self, sid, path, bulk, reason): | ||||||
|  |         msg = f"stream {path} is corrupted: {reason}" | ||||||
|  |         if not self.fix: | ||||||
|  |             raise FixableFsckError(msg) | ||||||
|  |         # Remove the stream from disk and the database | ||||||
|  |         err(f"\n{msg}\n") | ||||||
|  |         err(f"Removing stream {path} from disk and database\n") | ||||||
|  |         shutil.rmtree(bulk) | ||||||
|  |         with self.sql: | ||||||
|  |             cur = self.sql.cursor() | ||||||
|  |             cur.execute("DELETE FROM streams WHERE id=?", | ||||||
|  |                         (sid,)) | ||||||
|  |             if cur.rowcount != 1:  # pragma: no cover (shouldn't fail) | ||||||
|  |                 raise FsckError("failed to remove stream") | ||||||
|  |             cur.execute("DELETE FROM ranges WHERE stream_id=?", (sid,)) | ||||||
|  |             cur.execute("DELETE FROM metadata WHERE stream_id=?", (sid,)) | ||||||
|  |         raise RetryFsck | ||||||
|  |  | ||||||
|  |     ### Check interval endpoints | ||||||
|  |  | ||||||
|  |     def check_intervals(self): | ||||||
|  |         total_ints = sum(len(x) for x in list(self.stream_interval.values())) | ||||||
|  |         log("checking %s intervals\n", "{:,d}".format(total_ints)) | ||||||
|  |         done = 0 | ||||||
|  |         with Progress(total_ints) as pbar: | ||||||
|  |             for sid in self.stream_interval: | ||||||
|  |                 try: | ||||||
|  |                     bulk = self.bulkpath + self.stream_path[sid] | ||||||
|  |                     bulk = bulk.encode('utf-8') | ||||||
|  |                     tab = nilmdb.server.bulkdata.Table(bulk) | ||||||
|  |  | ||||||
|  |                     def update(x): | ||||||
|  |                         pbar.update(done + x) | ||||||
|  |  | ||||||
|  |                     ints = self.stream_interval[sid] | ||||||
|  |                     done += self.check_table_intervals(sid, ints, tab, update) | ||||||
|  |                 finally: | ||||||
|  |                     tab.close() | ||||||
|  |  | ||||||
|  |     def check_table_intervals(self, sid, ints, tab, update): | ||||||
|  |         # look in the table to make sure we can pick out the interval's | ||||||
|  |         # endpoints | ||||||
|  |         path = self.stream_path[sid]  # noqa: F841 unused | ||||||
|  |         tab.file_open.cache_remove_all() | ||||||
|  |         for (i, intv) in enumerate(ints): | ||||||
|  |             update(i) | ||||||
|  |             (stime, etime, spos, epos) = intv | ||||||
|  |             if spos == epos and spos >= 0 and spos <= tab.nrows: | ||||||
|  |                 continue | ||||||
|  |             try: | ||||||
|  |                 srow = tab[spos]    # noqa: F841 unused | ||||||
|  |                 erow = tab[epos-1]  # noqa: F841 unused | ||||||
|  |             except Exception as e: | ||||||
|  |                 self.fix_bad_interval(sid, intv, tab, str(e)) | ||||||
|  |  | ||||||
|  |         return len(ints) | ||||||
|  |  | ||||||
|  |     def fix_bad_interval(self, sid, intv, tab, msg): | ||||||
|  |         path = self.stream_path[sid] | ||||||
|  |         msg = sprintf("%s: interval %s error accessing rows: %s", | ||||||
|  |                       path, str(intv), str(msg)) | ||||||
|  |         if not self.fix: | ||||||
|  |             raise FixableFsckError(msg) | ||||||
|  |         err("\n%s\n", msg) | ||||||
|  |  | ||||||
|  |         (stime, etime, spos, epos) = intv | ||||||
|  |         # If it's just that the end pos is more than the number of rows | ||||||
|  |         # in the table, lower end pos and truncate interval time too. | ||||||
|  |         if spos < tab.nrows and epos >= tab.nrows: | ||||||
|  |             err("end position is past endrows, but it can be truncated\n") | ||||||
|  |             err("old end: time %d, pos %d\n", etime, epos) | ||||||
|  |             new_epos = tab.nrows | ||||||
|  |             new_etime = tab[new_epos-1] + 1 | ||||||
|  |             err("new end: time %d, pos %d\n", new_etime, new_epos) | ||||||
|  |             if stime < new_etime: | ||||||
|  |                 # Change it in SQL | ||||||
|  |                 with self.sql: | ||||||
|  |                     cur = self.sql.cursor() | ||||||
|  |                     cur.execute("UPDATE ranges SET end_time=?, end_pos=? " | ||||||
|  |                                 "WHERE stream_id=? AND start_time=? AND " | ||||||
|  |                                 "end_time=? AND start_pos=? AND end_pos=?", | ||||||
|  |                                 (new_etime, new_epos, sid, stime, etime, | ||||||
|  |                                  spos, epos)) | ||||||
|  |                     if cur.rowcount != 1:  # pragma: no cover (shouldn't fail) | ||||||
|  |                         raise FsckError("failed to fix SQL database") | ||||||
|  |                 raise RetryFsck | ||||||
|  |             err("actually it can't be truncated; times are bad too\n") | ||||||
|  |  | ||||||
|  |         # Otherwise, the only hope is to delete the interval entirely. | ||||||
|  |         err("*** Deleting the entire interval from SQL.\n") | ||||||
|  |         err("This may leave stale data on disk.  To fix that, copy all\n") | ||||||
|  |         err("data from this stream to a new stream using nilm-copy, then\n") | ||||||
|  |         err("remove all data from and destroy %s.\n", path) | ||||||
|  |         with self.sql: | ||||||
|  |             cur = self.sql.cursor() | ||||||
|  |             cur.execute("DELETE FROM ranges WHERE " | ||||||
|  |                         "stream_id=? AND start_time=? AND " | ||||||
|  |                         "end_time=? AND start_pos=? AND end_pos=?", | ||||||
|  |                         (sid, stime, etime, spos, epos)) | ||||||
|  |             if cur.rowcount != 1:  # pragma: no cover (shouldn't fail) | ||||||
|  |                 raise FsckError("failed to remove interval") | ||||||
|  |         raise RetryFsck | ||||||
|  |  | ||||||
|  |     ### Check data in each interval | ||||||
|  |  | ||||||
|  |     def check_data(self): | ||||||
|  |         total_rows = sum(sum((y[3] - y[2]) for y in x) | ||||||
|  |                          for x in list(self.stream_interval.values())) | ||||||
|  |         log("checking %s rows of data\n", "{:,d}".format(total_rows)) | ||||||
|  |         done = 0 | ||||||
|  |         with Progress(total_rows) as pbar: | ||||||
|  |             for sid in self.stream_interval: | ||||||
|  |                 try: | ||||||
|  |                     bulk = self.bulkpath + self.stream_path[sid] | ||||||
|  |                     bulk = bulk.encode('utf-8') | ||||||
|  |                     tab = nilmdb.server.bulkdata.Table(bulk) | ||||||
|  |  | ||||||
|  |                     def update(x): | ||||||
|  |                         pbar.update(done + x) | ||||||
|  |  | ||||||
|  |                     ints = self.stream_interval[sid] | ||||||
|  |                     done += self.check_table_data(sid, ints, tab, update) | ||||||
|  |                 finally: | ||||||
|  |                     tab.close() | ||||||
|  |  | ||||||
|  |     def check_table_data(self, sid, ints, tab, update): | ||||||
|  |         # Pull out all of the interval's data and verify that it's | ||||||
|  |         # monotonic. | ||||||
|  |         maxrows = getattr(self, 'maxrows_override', 100000) | ||||||
|  |         path = self.stream_path[sid] | ||||||
|  |         layout = self.stream_layout[sid] | ||||||
|  |         dtype = nilmdb.client.numpyclient.layout_to_dtype(layout) | ||||||
|  |         tab.file_open.cache_remove_all() | ||||||
|  |         done = 0 | ||||||
|  |         for intv in ints: | ||||||
|  |             last_ts = None | ||||||
|  |             (stime, etime, spos, epos) = intv | ||||||
|  |  | ||||||
|  |             # Break interval into maxrows-sized chunks | ||||||
|  |             next_start = spos | ||||||
|  |             while next_start < epos: | ||||||
|  |                 start = next_start | ||||||
|  |                 stop = min(start + maxrows, epos) | ||||||
|  |                 count = stop - start | ||||||
|  |                 next_start = stop | ||||||
|  |  | ||||||
|  |                 # Get raw data, convert to NumPy arary | ||||||
|  |                 try: | ||||||
|  |                     raw = tab.get_data(start, stop, binary=True) | ||||||
|  |                     data = numpy.frombuffer(raw, dtype) | ||||||
|  |                 except Exception as e:  # pragma: no cover | ||||||
|  |                     # No coverage because it's hard to trigger this -- earlier | ||||||
|  |                     # checks check the ranges, so this would probably be a real | ||||||
|  |                     # disk error, malloc failure, etc. | ||||||
|  |                     raise FsckError( | ||||||
|  |                         "%s: failed to grab rows %d through %d: %s", | ||||||
|  |                         path, start, stop, repr(e)) | ||||||
|  |  | ||||||
|  |                 ts = data['timestamp'] | ||||||
|  |  | ||||||
|  |                 # Verify that all timestamps are in range. | ||||||
|  |                 match = (ts < stime) | (ts >= etime) | ||||||
|  |                 if match.any(): | ||||||
|  |                     row = numpy.argmax(match) | ||||||
|  |                     raise FsckError("%s: data timestamp %d at row %d " | ||||||
|  |                                     "outside interval range [%d,%d)", | ||||||
|  |                                     path, ts[row], row + start, | ||||||
|  |                                     stime, etime) | ||||||
|  |  | ||||||
|  |                 # Verify that timestamps are monotonic | ||||||
|  |                 match = numpy.diff(ts) <= 0 | ||||||
|  |                 if match.any(): | ||||||
|  |                     row = numpy.argmax(match) | ||||||
|  |                     raise FsckError("%s: non-monotonic timestamp (%d -> %d)" | ||||||
|  |                                     " at row %d", path, ts[row], ts[row+1], | ||||||
|  |                                     row + start) | ||||||
|  |  | ||||||
|  |                 first_ts = ts[0] | ||||||
|  |                 if last_ts is not None and first_ts <= last_ts: | ||||||
|  |                     raise FsckError("%s: first interval timestamp %d is not " | ||||||
|  |                                     "greater than the previous last interval " | ||||||
|  |                                     "timestamp %d, at row %d", | ||||||
|  |                                     path, first_ts, last_ts, start) | ||||||
|  |                 last_ts = ts[-1] | ||||||
|  |  | ||||||
|  |                 # The previous errors are fixable, by removing the | ||||||
|  |                 # offending intervals, or changing the data | ||||||
|  |                 # timestamps.  But these are probably unlikely errors, | ||||||
|  |                 # so it's not worth implementing that yet. | ||||||
|  |  | ||||||
|  |                 # Done | ||||||
|  |                 done += count | ||||||
|  |                 update(done) | ||||||
|  |         return done | ||||||
| @@ -1,205 +0,0 @@ | |||||||
| """Interval and IntervalSet |  | ||||||
|  |  | ||||||
| Represents an interval of time, and a sorted set of such intervals""" |  | ||||||
|  |  | ||||||
| from datetime import datetime |  | ||||||
| import bisect |  | ||||||
|  |  | ||||||
| class IntervalError(Exception): |  | ||||||
|     """Error due to interval overlap, etc""" |  | ||||||
|     pass |  | ||||||
|  |  | ||||||
| class Interval(object): |  | ||||||
|     """Represents an interval of time""" |  | ||||||
|  |  | ||||||
|     start = None |  | ||||||
|     end = None |  | ||||||
|  |  | ||||||
|     def __init__(self, start, end): |  | ||||||
|         self.start = start |  | ||||||
|         self.end = end |  | ||||||
|  |  | ||||||
|     def __repr__(self): |  | ||||||
|         return "Interval(" + repr(self.start) + ", " + repr(self.end) + ")" |  | ||||||
|  |  | ||||||
|     def __str__(self): |  | ||||||
|         return "[" + str(self.start) + " -> " + str(self.end) + "]" |  | ||||||
|  |  | ||||||
|     def __setattr__(self, name, value): |  | ||||||
|         """Set attribute""" |  | ||||||
|         # TODO: If we need to manipulate file names, offsets, lengths, etc, |  | ||||||
|         # based on start and end time changing, maybe this is the right spot? |  | ||||||
|         # Or we could just disallow changing it here. |  | ||||||
|         if not isinstance(value, datetime): |  | ||||||
|             raise IntervalError("Must set datetime values") |  | ||||||
|         self.__dict__[name] = value |  | ||||||
|         if (type(self.start) is type(self.end)): |  | ||||||
|             if (self.start > self.end): |  | ||||||
|                 raise IntervalError("Interval start must precede interval end") |  | ||||||
|  |  | ||||||
|     def __cmp__(self, other): |  | ||||||
|         """Compare two intervals.  If non-equal, order by start then end""" |  | ||||||
|         if not isinstance(other, Interval): |  | ||||||
|             raise TypeError("Can't compare to non-interval") |  | ||||||
|         if (self.start == other.start): |  | ||||||
|             if (self.end < other.end): |  | ||||||
|                 return -1 |  | ||||||
|             if (self.end > other.end): |  | ||||||
|                 return 1 |  | ||||||
|             return 0 |  | ||||||
|         if (self.start < other.start): |  | ||||||
|             return -1 |  | ||||||
|         return 1 |  | ||||||
|                  |  | ||||||
|     def intersects(self, other): |  | ||||||
|         """Return True if two Interval objects intersect""" |  | ||||||
|         if (not isinstance(other, Interval)): |  | ||||||
|             raise TypeError("need Interval for intersection test") |  | ||||||
|         if (self.end <= other.start or |  | ||||||
|             self.start >= other.end): |  | ||||||
|             return False |  | ||||||
|         else: |  | ||||||
|             return True |  | ||||||
|  |  | ||||||
|     def is_adjacent(self, other): |  | ||||||
|         """Return True if two Intervals are adjacent (same end or start)""" |  | ||||||
|         if (not isinstance(other, Interval)): |  | ||||||
|             raise TypeError("need Interval for adjacency test") |  | ||||||
|         if (self.end == other.start or |  | ||||||
|             self.start == other.end): |  | ||||||
|             return True |  | ||||||
|         else: |  | ||||||
|             return False |  | ||||||
|  |  | ||||||
|     def subset(self, start, end): |  | ||||||
|         """Return a new Interval that is a subset of this one""" |  | ||||||
|         # TODO: Any magic regarding file/offset/length mapping for subsets |  | ||||||
|         if (start < self.start or end > self.end): |  | ||||||
|             raise IntervalError("not a subset") |  | ||||||
|         return Interval(start, end)             |  | ||||||
|  |  | ||||||
| class IntervalSet(object): |  | ||||||
|     """A non-intersecting set of intervals |  | ||||||
|  |  | ||||||
|     Kept sorted internally""" |  | ||||||
|  |  | ||||||
|     def __init__(self, iterable=None): |  | ||||||
|         self.data = [] |  | ||||||
|         if iterable is not None: |  | ||||||
|             if isinstance(iterable, Interval): |  | ||||||
|                 iterable = [iterable] |  | ||||||
|             self._add_intervals(iterable) |  | ||||||
|  |  | ||||||
|     def __iter__(self): |  | ||||||
|         return self.data.__iter__() |  | ||||||
|  |  | ||||||
|     def __repr__(self): |  | ||||||
|         return "IntervalSet(" + repr(list(self.data)) + ")" |  | ||||||
|  |  | ||||||
|     def __cmp__(self, other): |  | ||||||
|         # compare isn't supported, they don't really have an ordering |  | ||||||
|         raise TypeError("can't compare IntervalSets with cmp()") |  | ||||||
|  |  | ||||||
|     def __eq__(self, other): |  | ||||||
|         """Test equality of two IntervalSets. |  | ||||||
|  |  | ||||||
|         Treats adjacent Intervals as equivalent to one long interval, |  | ||||||
|         so this function really tests whether the IntervalSets cover |  | ||||||
|         the same spans of time.""" |  | ||||||
|         if not isinstance(other, IntervalSet): |  | ||||||
|             return False |  | ||||||
|         i = 0 |  | ||||||
|         j = 0 |  | ||||||
|         outside = True |  | ||||||
|         try: |  | ||||||
|             while True: |  | ||||||
|                 if (outside): |  | ||||||
|                     # To match, we need to be finished this set |  | ||||||
|                     if (i >= len(self) and j >= len(other)): |  | ||||||
|                         return True |  | ||||||
|                     # Or the starts need to match |  | ||||||
|                     if (self[i].start != other[j].start): |  | ||||||
|                         return False |  | ||||||
|                     outside = False |  | ||||||
|                 else: |  | ||||||
|                     # We can move on if the two interval ends match |  | ||||||
|                     if (self[i].end == other[j].end): |  | ||||||
|                         i += 1 |  | ||||||
|                         j += 1 |  | ||||||
|                         outside = True |  | ||||||
|                     else: |  | ||||||
|                         # Whichever ends first needs to be adjacent to the next |  | ||||||
|                         if (self[i].end < other[j].end): |  | ||||||
|                             if (not self[i].is_adjacent(self[i+1])): |  | ||||||
|                                 return False |  | ||||||
|                             i += 1 |  | ||||||
|                         else: |  | ||||||
|                             if (not other[j].is_adjacent(other[j+1])): |  | ||||||
|                                 return False |  | ||||||
|                             j += 1 |  | ||||||
|         except IndexError: |  | ||||||
|             return False |  | ||||||
|  |  | ||||||
|     def __ne__(self, other): |  | ||||||
|         return not self.__eq__(other) |  | ||||||
|  |  | ||||||
|     def __len__(self): |  | ||||||
|         return len(self.data) |  | ||||||
|  |  | ||||||
|     def __getitem__(self, key): |  | ||||||
|         return self.data.__getitem__(key) |  | ||||||
|  |  | ||||||
|     def __iadd__(self, other): |  | ||||||
|         """Inplace add -- modifies self |  | ||||||
|  |  | ||||||
|         This throws an exception if the regions being added intersect.""" |  | ||||||
|         if isinstance(other, Interval): |  | ||||||
|             other = [other] |  | ||||||
|         self._add_intervals(other) |  | ||||||
|         return self |  | ||||||
|          |  | ||||||
|     def __add__(self, other): |  | ||||||
|         """Add -- returns a new object |  | ||||||
|  |  | ||||||
|         This throws an exception if the regions being added intersect.""" |  | ||||||
|         new = IntervalSet(self) |  | ||||||
|         new += IntervalSet(other) |  | ||||||
|         return new |  | ||||||
|  |  | ||||||
|     def __and__(self, other): |  | ||||||
|         """Compute a new IntervalSet from the intersection of two others |  | ||||||
|  |  | ||||||
|         Output intervals are built as subsets of the intervals in the |  | ||||||
|         first argument (self).""" |  | ||||||
|         # If we were given a set, intersect with each interval in that set |  | ||||||
|         if isinstance(other, IntervalSet): |  | ||||||
|             out = IntervalSet() |  | ||||||
|             for interval in other.data: |  | ||||||
|                 out += self & interval |  | ||||||
|             return out |  | ||||||
|  |  | ||||||
|         if not isinstance(other, Interval): |  | ||||||
|             raise TypeError("can't intersect with that type") |  | ||||||
|  |  | ||||||
|         out = IntervalSet() |  | ||||||
|         for this in self.data: |  | ||||||
|             # If there's any overlap, add the overlapping region |  | ||||||
|             if (this.end > other.start and this.start < other.end): |  | ||||||
|                 out += this.subset(max(this.start, other.start), |  | ||||||
|                                    min(this.end, other.end)) |  | ||||||
|         return out |  | ||||||
|      |  | ||||||
|     def _add_intervals(self, iterable): |  | ||||||
|         """Add each Interval from an interable to this set""" |  | ||||||
|         for element in iter(iterable): |  | ||||||
|             self._add_single_interval(element) |  | ||||||
|  |  | ||||||
|     def _add_single_interval(self, interval): |  | ||||||
|         """Add one Interval to this set""" |  | ||||||
|         if (not isinstance(interval, Interval)): |  | ||||||
|             raise TypeError("can only add Intervals") |  | ||||||
|         for existing in self.data: |  | ||||||
|             if existing.intersects(interval): |  | ||||||
|                 raise IntervalError("Tried to add overlapping interval " |  | ||||||
|                                     "to this set") |  | ||||||
|         bisect.insort(self.data, interval) |  | ||||||
							
								
								
									
										1
									
								
								nilmdb/scripts/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1
									
								
								nilmdb/scripts/__init__.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1 @@ | |||||||
|  | # Command line scripts | ||||||
							
								
								
									
										27
									
								
								nilmdb/scripts/nilmdb_fsck.py
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										27
									
								
								nilmdb/scripts/nilmdb_fsck.py
									
									
									
									
									
										Executable file
									
								
							| @@ -0,0 +1,27 @@ | |||||||
|  | #!/usr/bin/env python3 | ||||||
|  |  | ||||||
|  | import nilmdb.fsck | ||||||
|  | import argparse | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def main(): | ||||||
|  |     """Main entry point for the 'nilmdb-fsck' command line script""" | ||||||
|  |  | ||||||
|  |     parser = argparse.ArgumentParser( | ||||||
|  |         description='Check database consistency', | ||||||
|  |         formatter_class=argparse.ArgumentDefaultsHelpFormatter) | ||||||
|  |     parser.add_argument("-v", "--version", action="version", | ||||||
|  |                         version=nilmdb.__version__) | ||||||
|  |     parser.add_argument("-f", "--fix", action="store_true", | ||||||
|  |                         default=False, help='Fix errors when possible ' | ||||||
|  |                         '(which may involve removing data)') | ||||||
|  |     parser.add_argument("-n", "--no-data", action="store_true", | ||||||
|  |                         default=False, help='Skip the slow full-data check') | ||||||
|  |     parser.add_argument('database', help='Database directory') | ||||||
|  |     args = parser.parse_args() | ||||||
|  |  | ||||||
|  |     nilmdb.fsck.Fsck(args.database, args.fix).check(skip_data=args.no_data) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | if __name__ == "__main__": | ||||||
|  |     main() | ||||||
							
								
								
									
										99
									
								
								nilmdb/scripts/nilmdb_server.py
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										99
									
								
								nilmdb/scripts/nilmdb_server.py
									
									
									
									
									
										Executable file
									
								
							| @@ -0,0 +1,99 @@ | |||||||
|  | #!/usr/bin/env python3 | ||||||
|  |  | ||||||
|  | import os | ||||||
|  | import sys | ||||||
|  | import socket | ||||||
|  | import argparse | ||||||
|  |  | ||||||
|  | import cherrypy | ||||||
|  |  | ||||||
|  | import nilmdb.server | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def main(): | ||||||
|  |     """Main entry point for the 'nilmdb-server' command line script""" | ||||||
|  |  | ||||||
|  |     parser = argparse.ArgumentParser( | ||||||
|  |         description='Run the NilmDB server', | ||||||
|  |         formatter_class=argparse.ArgumentDefaultsHelpFormatter) | ||||||
|  |  | ||||||
|  |     parser.add_argument("-v", "--version", action="version", | ||||||
|  |                         version=nilmdb.__version__) | ||||||
|  |  | ||||||
|  |     group = parser.add_argument_group("Standard options") | ||||||
|  |     group.add_argument('-a', '--address', | ||||||
|  |                        help='Only listen on the given address', | ||||||
|  |                        default='0.0.0.0') | ||||||
|  |     group.add_argument('-p', '--port', help='Listen on the given port', | ||||||
|  |                        type=int, default=12380) | ||||||
|  |     group.add_argument('-d', '--database', help='Database directory', | ||||||
|  |                        default="./db") | ||||||
|  |     group.add_argument('-q', '--quiet', help='Silence output', | ||||||
|  |                        action='store_true') | ||||||
|  |     group.add_argument('-t', '--traceback', | ||||||
|  |                        help='Provide tracebacks in client errors', | ||||||
|  |                        action='store_true', default=False) | ||||||
|  |  | ||||||
|  |     group = parser.add_argument_group("Debug options") | ||||||
|  |     group.add_argument('-y', '--yappi', help='Run under yappi profiler and ' | ||||||
|  |                        'invoke interactive shell afterwards', | ||||||
|  |                        action='store_true') | ||||||
|  |  | ||||||
|  |     args = parser.parse_args() | ||||||
|  |  | ||||||
|  |     # Create database object.  Needs to be serialized before passing | ||||||
|  |     # to the Server. | ||||||
|  |     db = nilmdb.utils.serializer_proxy(nilmdb.server.NilmDB)(args.database) | ||||||
|  |  | ||||||
|  |     # Configure the server | ||||||
|  |     if not args.quiet: | ||||||
|  |         cherrypy._cpconfig.environments['embedded']['log.screen'] = True | ||||||
|  |  | ||||||
|  |     server = nilmdb.server.Server(db, | ||||||
|  |                                   host=args.address, | ||||||
|  |                                   port=args.port, | ||||||
|  |                                   force_traceback=args.traceback) | ||||||
|  |  | ||||||
|  |     # Print info | ||||||
|  |     if not args.quiet: | ||||||
|  |         print("Version: %s" % nilmdb.__version__) | ||||||
|  |         print("Database: %s" % (os.path.realpath(args.database))) | ||||||
|  |         if args.address == '0.0.0.0' or args.address == '::': | ||||||
|  |             host = socket.getfqdn() | ||||||
|  |         else: | ||||||
|  |             host = args.address | ||||||
|  |         print("Server URL: http://%s:%d/" % (host, args.port)) | ||||||
|  |         print("----") | ||||||
|  |  | ||||||
|  |     # Run it | ||||||
|  |     try: | ||||||
|  |         if args.yappi: | ||||||
|  |             print("Running in yappi") | ||||||
|  |             try: | ||||||
|  |                 import yappi | ||||||
|  |                 yappi.start() | ||||||
|  |                 server.start(blocking=True) | ||||||
|  |             finally: | ||||||
|  |                 yappi.stop() | ||||||
|  |                 stats = yappi.get_func_stats() | ||||||
|  |                 stats.sort("ttot") | ||||||
|  |                 stats.print_all() | ||||||
|  |                 try: | ||||||
|  |                     from IPython import embed | ||||||
|  |                     embed(header="Use the `yappi` or `stats` object to " | ||||||
|  |                           "explore further, `quit` to exit") | ||||||
|  |                 except ModuleNotFoundError: | ||||||
|  |                     print("\nInstall ipython to explore further") | ||||||
|  |         else: | ||||||
|  |             server.start(blocking=True) | ||||||
|  |     except nilmdb.server.serverutil.CherryPyExit: | ||||||
|  |         print("Exiting due to CherryPy error", file=sys.stderr) | ||||||
|  |         raise | ||||||
|  |     finally: | ||||||
|  |         if not args.quiet: | ||||||
|  |             print("Closing database") | ||||||
|  |             db.close() | ||||||
|  |  | ||||||
|  |  | ||||||
|  | if __name__ == "__main__": | ||||||
|  |     main() | ||||||
							
								
								
									
										12
									
								
								nilmdb/scripts/nilmtool.py
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										12
									
								
								nilmdb/scripts/nilmtool.py
									
									
									
									
									
										Executable file
									
								
							| @@ -0,0 +1,12 @@ | |||||||
|  | #!/usr/bin/env python3 | ||||||
|  |  | ||||||
|  | import nilmdb.cmdline | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def main(): | ||||||
|  |     """Main entry point for the 'nilmtool' command line script""" | ||||||
|  |     nilmdb.cmdline.Cmdline().run() | ||||||
|  |  | ||||||
|  |  | ||||||
|  | if __name__ == "__main__": | ||||||
|  |     main() | ||||||
							
								
								
									
										9
									
								
								nilmdb/server/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										9
									
								
								nilmdb/server/__init__.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,9 @@ | |||||||
|  | """nilmdb.server""" | ||||||
|  |  | ||||||
|  | # Set up pyximport to automatically rebuild Cython modules if needed. | ||||||
|  | import pyximport | ||||||
|  | pyximport.install(inplace=True, build_in_temp=False) | ||||||
|  |  | ||||||
|  | from nilmdb.server.nilmdb import NilmDB | ||||||
|  | from nilmdb.server.server import Server, wsgi_application | ||||||
|  | from nilmdb.server.errors import NilmDBError, StreamError, OverlapError | ||||||
							
								
								
									
										635
									
								
								nilmdb/server/bulkdata.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										635
									
								
								nilmdb/server/bulkdata.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,635 @@ | |||||||
|  | # Fixed record size bulk data storage | ||||||
|  |  | ||||||
|  | import os | ||||||
|  | import re | ||||||
|  | import sys | ||||||
|  | import pickle | ||||||
|  | import tempfile | ||||||
|  |  | ||||||
|  | from nilmdb.utils.printf import sprintf | ||||||
|  | from nilmdb.utils.time import timestamp_to_string | ||||||
|  | import nilmdb.utils | ||||||
|  |  | ||||||
|  | import nilmdb.utils.lock | ||||||
|  | from . import rocket | ||||||
|  |  | ||||||
|  | # Up to 256 open file descriptors at any given time. | ||||||
|  | # These variables are global so they can be used in the decorator arguments. | ||||||
|  | table_cache_size = 32 | ||||||
|  | fd_cache_size = 8 | ||||||
|  |  | ||||||
|  |  | ||||||
|  | @nilmdb.utils.must_close(wrap_verify=False) | ||||||
|  | class BulkData(): | ||||||
|  |     def __init__(self, basepath, **kwargs): | ||||||
|  |         if isinstance(basepath, str): | ||||||
|  |             self.basepath = self._encode_filename(basepath) | ||||||
|  |         else: | ||||||
|  |             self.basepath = basepath | ||||||
|  |         self.root = os.path.join(self.basepath, b"data") | ||||||
|  |         self.lock = self.root + b".lock" | ||||||
|  |         self.lockfile = None | ||||||
|  |  | ||||||
|  |         # Tuneables | ||||||
|  |         if "file_size" in kwargs and kwargs["file_size"] is not None: | ||||||
|  |             self.file_size = kwargs["file_size"] | ||||||
|  |         else: | ||||||
|  |             # Default to approximately 128 MiB per file | ||||||
|  |             self.file_size = 128 * 1024 * 1024 | ||||||
|  |  | ||||||
|  |         if "files_per_dir" in kwargs and kwargs["files_per_dir"] is not None: | ||||||
|  |             self.files_per_dir = kwargs["files_per_dir"] | ||||||
|  |         else: | ||||||
|  |             # 32768 files per dir should work even on FAT32 | ||||||
|  |             self.files_per_dir = 32768 | ||||||
|  |  | ||||||
|  |         if "initial_nrows" in kwargs and kwargs["initial_nrows"] is not None: | ||||||
|  |             self.initial_nrows = kwargs["initial_nrows"] | ||||||
|  |         else: | ||||||
|  |             # First row is 0 | ||||||
|  |             self.initial_nrows = 0 | ||||||
|  |  | ||||||
|  |         # Make root path | ||||||
|  |         if not os.path.isdir(self.root): | ||||||
|  |             os.mkdir(self.root) | ||||||
|  |  | ||||||
|  |         # Create the lock | ||||||
|  |         self.lockfile = open(self.lock, "w") | ||||||
|  |         if not nilmdb.utils.lock.exclusive_lock(self.lockfile): | ||||||
|  |             raise IOError('database at "' + | ||||||
|  |                           self._decode_filename(self.basepath) + | ||||||
|  |                           '" is already locked by another process') | ||||||
|  |  | ||||||
|  |     def close(self): | ||||||
|  |         self.getnode.cache_remove_all() | ||||||
|  |         if self.lockfile: | ||||||
|  |             nilmdb.utils.lock.exclusive_unlock(self.lockfile) | ||||||
|  |             self.lockfile.close() | ||||||
|  |             try: | ||||||
|  |                 os.unlink(self.lock) | ||||||
|  |             except OSError: | ||||||
|  |                 pass | ||||||
|  |             self.lockfile = None | ||||||
|  |  | ||||||
|  |     def _encode_filename(self, path): | ||||||
|  |         # Translate unicode strings to raw bytes, if needed.  We | ||||||
|  |         # always manipulate paths internally as bytes. | ||||||
|  |         return path.encode('utf-8') | ||||||
|  |  | ||||||
|  |     def _decode_filename(self, path): | ||||||
|  |         # Translate raw bytes to unicode strings, escaping if needed | ||||||
|  |         return path.decode('utf-8', errors='backslashreplace') | ||||||
|  |  | ||||||
|  |     def _create_check_ospath(self, ospath): | ||||||
|  |         if ospath[-1:] == b'/': | ||||||
|  |             raise ValueError("invalid path; should not end with a /") | ||||||
|  |         if Table.exists(ospath): | ||||||
|  |             raise ValueError("stream already exists at this path") | ||||||
|  |         if os.path.isdir(ospath): | ||||||
|  |             # Look for any files in subdirectories.  Fully empty subdirectories | ||||||
|  |             # are OK; they might be there during a rename | ||||||
|  |             for (root, dirs, files) in os.walk(ospath): | ||||||
|  |                 if files: | ||||||
|  |                     raise ValueError( | ||||||
|  |                         "non-empty subdirs of this path already exist") | ||||||
|  |  | ||||||
|  |     def _create_parents(self, unicodepath): | ||||||
|  |         """Verify the path name, and create parent directories if they | ||||||
|  |         don't exist.  Returns a list of elements that got created.""" | ||||||
|  |         path = self._encode_filename(unicodepath) | ||||||
|  |  | ||||||
|  |         if path[0:1] != b'/': | ||||||
|  |             raise ValueError("paths must start with / ") | ||||||
|  |         [group, node] = path.rsplit(b"/", 1) | ||||||
|  |         if group == b'': | ||||||
|  |             raise ValueError("invalid path; path must contain at least one " | ||||||
|  |                              "folder") | ||||||
|  |         if node == b'': | ||||||
|  |             raise ValueError("invalid path; should not end with a /") | ||||||
|  |         if not Table.valid_path(path): | ||||||
|  |             raise ValueError("path name is invalid or contains reserved words") | ||||||
|  |  | ||||||
|  |         # Create the table's base dir.  Note that we make a | ||||||
|  |         # distinction here between NilmDB paths (always Unix style, | ||||||
|  |         # split apart manually) and OS paths (built up with | ||||||
|  |         # os.path.join) | ||||||
|  |  | ||||||
|  |         # Make directories leading up to this one | ||||||
|  |         elements = path.lstrip(b'/').split(b'/') | ||||||
|  |         made_dirs = [] | ||||||
|  |         try: | ||||||
|  |             # Make parent elements | ||||||
|  |             for i in range(len(elements)): | ||||||
|  |                 ospath = os.path.join(self.root, *elements[0:i]) | ||||||
|  |                 if Table.exists(ospath): | ||||||
|  |                     raise ValueError("path is subdir of existing node") | ||||||
|  |                 if not os.path.isdir(ospath): | ||||||
|  |                     os.mkdir(ospath) | ||||||
|  |                     made_dirs.append(ospath) | ||||||
|  |         except Exception: | ||||||
|  |             # Remove paths that we created | ||||||
|  |             for ospath in reversed(made_dirs): | ||||||
|  |                 os.rmdir(ospath) | ||||||
|  |             raise | ||||||
|  |  | ||||||
|  |         return elements | ||||||
|  |  | ||||||
|  |     def create(self, unicodepath, layout_name): | ||||||
|  |         """ | ||||||
|  |         unicodepath: path to the data (e.g. u'/newton/prep'). | ||||||
|  |         Paths must contain at least two elements, e.g.: | ||||||
|  |            /newton/prep | ||||||
|  |            /newton/raw | ||||||
|  |            /newton/upstairs/prep | ||||||
|  |            /newton/upstairs/raw | ||||||
|  |  | ||||||
|  |         layout_name: string for nilmdb.layout.get_named(), e.g. 'float32_8' | ||||||
|  |         """ | ||||||
|  |         elements = self._create_parents(unicodepath) | ||||||
|  |  | ||||||
|  |         # Make the final dir | ||||||
|  |         ospath = os.path.join(self.root, *elements) | ||||||
|  |         self._create_check_ospath(ospath) | ||||||
|  |         os.mkdir(ospath) | ||||||
|  |  | ||||||
|  |         try: | ||||||
|  |             # Write format string to file | ||||||
|  |             Table.create(ospath, layout_name, self.file_size, | ||||||
|  |                          self.files_per_dir) | ||||||
|  |  | ||||||
|  |             # Open and cache it | ||||||
|  |             self.getnode(unicodepath) | ||||||
|  |         except Exception: | ||||||
|  |             exc_info = sys.exc_info() | ||||||
|  |             try: | ||||||
|  |                 os.rmdir(ospath) | ||||||
|  |             except OSError: | ||||||
|  |                 pass | ||||||
|  |             raise exc_info[1].with_traceback(exc_info[2]) | ||||||
|  |  | ||||||
|  |         # Success | ||||||
|  |         return | ||||||
|  |  | ||||||
|  |     def _remove_leaves(self, unicodepath): | ||||||
|  |         """Remove empty directories starting at the leaves of unicodepath""" | ||||||
|  |         path = self._encode_filename(unicodepath) | ||||||
|  |         elements = path.lstrip(b'/').split(b'/') | ||||||
|  |         for i in reversed(list(range(len(elements)))): | ||||||
|  |             ospath = os.path.join(self.root, *elements[0:i+1]) | ||||||
|  |             try: | ||||||
|  |                 os.rmdir(ospath) | ||||||
|  |             except OSError: | ||||||
|  |                 pass | ||||||
|  |  | ||||||
|  |     def rename(self, oldunicodepath, newunicodepath): | ||||||
|  |         """Move entire tree from 'oldunicodepath' to | ||||||
|  |         'newunicodepath'""" | ||||||
|  |         oldpath = self._encode_filename(oldunicodepath) | ||||||
|  |         newpath = self._encode_filename(newunicodepath) | ||||||
|  |  | ||||||
|  |         # Get OS paths | ||||||
|  |         oldelements = oldpath.lstrip(b'/').split(b'/') | ||||||
|  |         oldospath = os.path.join(self.root, *oldelements) | ||||||
|  |         newelements = newpath.lstrip(b'/').split(b'/') | ||||||
|  |         newospath = os.path.join(self.root, *newelements) | ||||||
|  |  | ||||||
|  |         # Basic checks | ||||||
|  |         if oldospath == newospath: | ||||||
|  |             raise ValueError("old and new paths are the same") | ||||||
|  |  | ||||||
|  |         # Remove Table object at old path from cache | ||||||
|  |         self.getnode.cache_remove(self, oldunicodepath) | ||||||
|  |  | ||||||
|  |         # Move the table to a temporary location | ||||||
|  |         tmpdir = tempfile.mkdtemp(prefix=b"rename-", dir=self.root) | ||||||
|  |         tmppath = os.path.join(tmpdir, b"table") | ||||||
|  |         os.rename(oldospath, tmppath) | ||||||
|  |  | ||||||
|  |         try: | ||||||
|  |             # Check destination path | ||||||
|  |             self._create_check_ospath(newospath) | ||||||
|  |  | ||||||
|  |             # Create parent dirs for new location | ||||||
|  |             self._create_parents(newunicodepath) | ||||||
|  |  | ||||||
|  |             # Move table into new location | ||||||
|  |             os.rename(tmppath, newospath) | ||||||
|  |         except Exception: | ||||||
|  |             # On failure, move the table back to original path | ||||||
|  |             os.rename(tmppath, oldospath) | ||||||
|  |             os.rmdir(tmpdir) | ||||||
|  |             raise | ||||||
|  |  | ||||||
|  |         # Prune old dirs | ||||||
|  |         self._remove_leaves(oldunicodepath) | ||||||
|  |         os.rmdir(tmpdir) | ||||||
|  |  | ||||||
|  |     def destroy(self, unicodepath): | ||||||
|  |         """Fully remove all data at a particular path.  No way to undo | ||||||
|  |         it!  The group/path structure is removed, too.""" | ||||||
|  |         path = self._encode_filename(unicodepath) | ||||||
|  |  | ||||||
|  |         # Get OS path | ||||||
|  |         elements = path.lstrip(b'/').split(b'/') | ||||||
|  |         ospath = os.path.join(self.root, *elements) | ||||||
|  |  | ||||||
|  |         # Remove Table object from cache | ||||||
|  |         self.getnode.cache_remove(self, unicodepath) | ||||||
|  |  | ||||||
|  |         # Remove the contents of the target directory | ||||||
|  |         if not Table.exists(ospath): | ||||||
|  |             raise ValueError("nothing at that path") | ||||||
|  |         for (root, dirs, files) in os.walk(ospath, topdown=False): | ||||||
|  |             for name in files: | ||||||
|  |                 os.remove(os.path.join(root, name)) | ||||||
|  |             for name in dirs: | ||||||
|  |                 os.rmdir(os.path.join(root, name)) | ||||||
|  |  | ||||||
|  |         # Remove leftover empty directories | ||||||
|  |         self._remove_leaves(unicodepath) | ||||||
|  |  | ||||||
|  |     # Cache open tables | ||||||
|  |     @nilmdb.utils.lru_cache(size=table_cache_size, | ||||||
|  |                             onremove=lambda x: x.close()) | ||||||
|  |     def getnode(self, unicodepath): | ||||||
|  |         """Return a Table object corresponding to the given database | ||||||
|  |         path, which must exist.""" | ||||||
|  |         path = self._encode_filename(unicodepath) | ||||||
|  |         elements = path.lstrip(b'/').split(b'/') | ||||||
|  |         ospath = os.path.join(self.root, *elements) | ||||||
|  |         return Table(ospath, self.initial_nrows) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | @nilmdb.utils.must_close(wrap_verify=False) | ||||||
|  | class Table(): | ||||||
|  |     """Tools to help access a single table (data at a specific OS path).""" | ||||||
|  |     # See design.md for design details | ||||||
|  |  | ||||||
|  |     # Class methods, to help keep format details in this class. | ||||||
|  |     @classmethod | ||||||
|  |     def valid_path(cls, root): | ||||||
|  |         """Return True if a root path is a valid name""" | ||||||
|  |         return b"_format" not in root.split(b"/") | ||||||
|  |  | ||||||
|  |     @classmethod | ||||||
|  |     def exists(cls, root): | ||||||
|  |         """Return True if a table appears to exist at this OS path""" | ||||||
|  |         return os.path.isfile(os.path.join(root, b"_format")) | ||||||
|  |  | ||||||
|  |     @classmethod | ||||||
|  |     def create(cls, root, layout, file_size, files_per_dir): | ||||||
|  |         """Initialize a table at the given OS path with the | ||||||
|  |         given layout string""" | ||||||
|  |  | ||||||
|  |         # Calculate rows per file so that each file is approximately | ||||||
|  |         # file_size bytes. | ||||||
|  |         rkt = rocket.Rocket(layout, None) | ||||||
|  |         rows_per_file = max(file_size // rkt.binary_size, 1) | ||||||
|  |         rkt.close() | ||||||
|  |  | ||||||
|  |         fmt = { | ||||||
|  |             "rows_per_file": rows_per_file, | ||||||
|  |             "files_per_dir": files_per_dir, | ||||||
|  |             "layout": layout, | ||||||
|  |             "version": 3 | ||||||
|  |         } | ||||||
|  |         nilmdb.utils.atomic.replace_file( | ||||||
|  |             os.path.join(root, b"_format"), pickle.dumps(fmt, 2)) | ||||||
|  |  | ||||||
|  |     # Normal methods | ||||||
|  |     def __init__(self, root, initial_nrows=0): | ||||||
|  |         """'root' is the full OS path to the directory of this table""" | ||||||
|  |         self.root = root | ||||||
|  |         self.initial_nrows = initial_nrows | ||||||
|  |  | ||||||
|  |         # Load the format | ||||||
|  |         with open(os.path.join(self.root, b"_format"), "rb") as f: | ||||||
|  |             fmt = pickle.load(f) | ||||||
|  |  | ||||||
|  |         if fmt["version"] != 3: | ||||||
|  |             # Old versions used floating point timestamps, which aren't | ||||||
|  |             # valid anymore. | ||||||
|  |             raise NotImplementedError("old version " + str(fmt["version"]) + | ||||||
|  |                                       " bulk data store is not supported") | ||||||
|  |  | ||||||
|  |         self.rows_per_file = fmt["rows_per_file"] | ||||||
|  |         self.files_per_dir = fmt["files_per_dir"] | ||||||
|  |         self.layout = fmt["layout"] | ||||||
|  |  | ||||||
|  |         # Use rocket to get row size and file size | ||||||
|  |         rkt = rocket.Rocket(self.layout, None) | ||||||
|  |         self.row_size = rkt.binary_size | ||||||
|  |         self.file_size = rkt.binary_size * self.rows_per_file | ||||||
|  |         rkt.close() | ||||||
|  |  | ||||||
|  |         # Find nrows | ||||||
|  |         self.nrows = self._get_nrows() | ||||||
|  |  | ||||||
|  |     def close(self): | ||||||
|  |         self.file_open.cache_remove_all() | ||||||
|  |  | ||||||
|  |     # Internal helpers | ||||||
|  |     def _get_nrows(self): | ||||||
|  |         """Find nrows by locating the lexicographically last filename | ||||||
|  |         and using its size""" | ||||||
|  |         # Note that this just finds a 'nrows' that is guaranteed to be | ||||||
|  |         # greater than the row number of any piece of data that | ||||||
|  |         # currently exists, not necessarily all data that _ever_ | ||||||
|  |         # existed. | ||||||
|  |         regex = re.compile(b"^[0-9a-f]{4,}$") | ||||||
|  |  | ||||||
|  |         # Find the last directory.  We sort and loop through all of them, | ||||||
|  |         # starting with the numerically greatest, because the dirs could be | ||||||
|  |         # empty if something was deleted but the directory was unexpectedly | ||||||
|  |         # not deleted. | ||||||
|  |         subdirs = sorted(filter(regex.search, os.listdir(self.root)), | ||||||
|  |                          key=lambda x: int(x, 16), reverse=True) | ||||||
|  |  | ||||||
|  |         for subdir in subdirs: | ||||||
|  |             # Now find the last file in that dir | ||||||
|  |             path = os.path.join(self.root, subdir) | ||||||
|  |             files = list(filter(regex.search, os.listdir(path))) | ||||||
|  |             if not files: | ||||||
|  |                 # Empty dir: try the next one | ||||||
|  |                 continue | ||||||
|  |  | ||||||
|  |             # Find the numerical max | ||||||
|  |             filename = max(files, key=lambda x: int(x, 16)) | ||||||
|  |             offset = os.path.getsize(os.path.join(self.root, subdir, filename)) | ||||||
|  |  | ||||||
|  |             # Convert to row number | ||||||
|  |             return self._row_from_offset(subdir, filename, offset) | ||||||
|  |  | ||||||
|  |         # No files, so no data.  We typically start at row 0 in this | ||||||
|  |         # case, although initial_nrows is specified during some tests | ||||||
|  |         # to exercise other parts of the code better.  Since we have | ||||||
|  |         # no files yet, round initial_nrows up so it points to a row | ||||||
|  |         # that would begin a new file. | ||||||
|  |         nrows = ((self.initial_nrows + (self.rows_per_file - 1)) // | ||||||
|  |                  self.rows_per_file) * self.rows_per_file | ||||||
|  |         return nrows | ||||||
|  |  | ||||||
|  |     def _offset_from_row(self, row): | ||||||
|  |         """Return a (subdir, filename, offset, count) tuple: | ||||||
|  |  | ||||||
|  |           subdir: subdirectory for the file | ||||||
|  |         filename: the filename that contains the specified row | ||||||
|  |           offset: byte offset of the specified row within the file | ||||||
|  |            count: number of rows (starting at offset) that fit in the file | ||||||
|  |         """ | ||||||
|  |         filenum = row // self.rows_per_file | ||||||
|  |         # It's OK if these format specifiers are too short; the filenames | ||||||
|  |         # will just get longer but will still sort correctly. | ||||||
|  |         dirname = sprintf(b"%04x", filenum // self.files_per_dir) | ||||||
|  |         filename = sprintf(b"%04x", filenum % self.files_per_dir) | ||||||
|  |         offset = (row % self.rows_per_file) * self.row_size | ||||||
|  |         count = self.rows_per_file - (row % self.rows_per_file) | ||||||
|  |         return (dirname, filename, offset, count) | ||||||
|  |  | ||||||
|  |     def _row_from_offset(self, subdir, filename, offset): | ||||||
|  |         """Return the row number that corresponds to the given | ||||||
|  |         'subdir/filename' and byte-offset within that file.""" | ||||||
|  |         if (offset % self.row_size) != 0: | ||||||
|  |             # this shouldn't occur, unless there is some corruption somewhere | ||||||
|  |             raise ValueError("file offset is not a multiple of data size") | ||||||
|  |         filenum = int(subdir, 16) * self.files_per_dir + int(filename, 16) | ||||||
|  |         row = (filenum * self.rows_per_file) + (offset // self.row_size) | ||||||
|  |         return row | ||||||
|  |  | ||||||
|  |     def _remove_or_truncate_file(self, subdir, filename, offset=0): | ||||||
|  |         """Remove the given file, and remove the subdirectory too | ||||||
|  |         if it's empty.  If offset is nonzero, truncate the file | ||||||
|  |         to that size instead.""" | ||||||
|  |         # Close potentially open file in file_open LRU cache | ||||||
|  |         self.file_open.cache_remove(self, subdir, filename) | ||||||
|  |         if offset: | ||||||
|  |             # Truncate it | ||||||
|  |             with open(os.path.join(self.root, subdir, filename), "r+b") as f: | ||||||
|  |                 f.truncate(offset) | ||||||
|  |         else: | ||||||
|  |             # Remove file | ||||||
|  |             os.remove(os.path.join(self.root, subdir, filename)) | ||||||
|  |             # Try deleting subdir, too | ||||||
|  |             try: | ||||||
|  |                 os.rmdir(os.path.join(self.root, subdir)) | ||||||
|  |             except Exception: | ||||||
|  |                 pass | ||||||
|  |  | ||||||
|  |     # Cache open files | ||||||
|  |     @nilmdb.utils.lru_cache(size=fd_cache_size, | ||||||
|  |                             onremove=lambda f: f.close()) | ||||||
|  |     def file_open(self, subdir, filename): | ||||||
|  |         """Open and map a given 'subdir/filename' (relative to self.root). | ||||||
|  |         Will be automatically closed when evicted from the cache.""" | ||||||
|  |         # Create path if it doesn't exist | ||||||
|  |         try: | ||||||
|  |             os.mkdir(os.path.join(self.root, subdir)) | ||||||
|  |         except OSError: | ||||||
|  |             pass | ||||||
|  |         # Return a rocket.Rocket object, which contains the open file | ||||||
|  |         return rocket.Rocket(self.layout, | ||||||
|  |                              os.path.join(self.root, subdir, filename)) | ||||||
|  |  | ||||||
|  |     def append_data(self, data, start, end, binary=False): | ||||||
|  |         """Parse the formatted string in 'data', according to the | ||||||
|  |         current layout, and append it to the table.  If any timestamps | ||||||
|  |         are non-monotonic, or don't fall between 'start' and 'end', | ||||||
|  |         a ValueError is raised. | ||||||
|  |  | ||||||
|  |         Note that data is always of 'bytes' type. | ||||||
|  |  | ||||||
|  |         If 'binary' is True, the data should be in raw binary format | ||||||
|  |         instead: little-endian, matching the current table's layout, | ||||||
|  |         including the int64 timestamp. | ||||||
|  |  | ||||||
|  |         If this function succeeds, it returns normally.  Otherwise, | ||||||
|  |         the table is reverted back to its original state by truncating | ||||||
|  |         or deleting files as necessary.""" | ||||||
|  |         data_offset = 0 | ||||||
|  |         last_timestamp = nilmdb.utils.time.min_timestamp | ||||||
|  |         tot_rows = self.nrows | ||||||
|  |         count = 0 | ||||||
|  |         linenum = 0 | ||||||
|  |         try: | ||||||
|  |             while data_offset < len(data): | ||||||
|  |                 # See how many rows we can fit into the current file, | ||||||
|  |                 # and open it | ||||||
|  |                 (subdir, fname, offs, count) = self._offset_from_row(tot_rows) | ||||||
|  |                 f = self.file_open(subdir, fname) | ||||||
|  |  | ||||||
|  |                 # Ask the rocket object to parse and append up to "count" | ||||||
|  |                 # rows of data, verifying things along the way. | ||||||
|  |                 try: | ||||||
|  |                     if binary: | ||||||
|  |                         appender = f.append_binary | ||||||
|  |                     else: | ||||||
|  |                         appender = f.append_string | ||||||
|  |                     (added_rows, data_offset, last_timestamp, linenum | ||||||
|  |                      ) = appender(count, data, data_offset, linenum, | ||||||
|  |                                   start, end, last_timestamp) | ||||||
|  |                 except rocket.ParseError as e: | ||||||
|  |                     (linenum, colnum, errtype, obj) = e.args | ||||||
|  |                     if binary: | ||||||
|  |                         where = "byte %d: " % (linenum) | ||||||
|  |                     else: | ||||||
|  |                         where = "line %d, column %d: " % (linenum, colnum) | ||||||
|  |                     # Extract out the error line, add column marker | ||||||
|  |                     try: | ||||||
|  |                         if binary: | ||||||
|  |                             raise IndexError | ||||||
|  |                         bad = data.splitlines()[linenum-1] | ||||||
|  |                         bad += b'\n' + b' ' * (colnum - 1) + b'^' | ||||||
|  |                     except IndexError: | ||||||
|  |                         bad = b"" | ||||||
|  |                     if errtype == rocket.ERR_NON_MONOTONIC: | ||||||
|  |                         err = "timestamp is not monotonically increasing" | ||||||
|  |                     elif errtype == rocket.ERR_OUT_OF_INTERVAL: | ||||||
|  |                         if obj < start: | ||||||
|  |                             err = sprintf("Data timestamp %s < start time %s", | ||||||
|  |                                           timestamp_to_string(obj), | ||||||
|  |                                           timestamp_to_string(start)) | ||||||
|  |                         else: | ||||||
|  |                             err = sprintf("Data timestamp %s >= end time %s", | ||||||
|  |                                           timestamp_to_string(obj), | ||||||
|  |                                           timestamp_to_string(end)) | ||||||
|  |                     else: | ||||||
|  |                         err = str(obj) | ||||||
|  |                     bad_str = bad.decode('utf-8', errors='backslashreplace') | ||||||
|  |                     raise ValueError("error parsing input data: " + | ||||||
|  |                                      where + err + "\n" + bad_str) | ||||||
|  |                 tot_rows += added_rows | ||||||
|  |         except Exception: | ||||||
|  |             # Some failure, so try to roll things back by truncating or | ||||||
|  |             # deleting files that we may have appended data to. | ||||||
|  |             cleanpos = self.nrows | ||||||
|  |             while cleanpos <= tot_rows: | ||||||
|  |                 (subdir, fname, offs, count) = self._offset_from_row(cleanpos) | ||||||
|  |                 self._remove_or_truncate_file(subdir, fname, offs) | ||||||
|  |                 cleanpos += count | ||||||
|  |             # Re-raise original exception | ||||||
|  |             raise | ||||||
|  |         else: | ||||||
|  |             # Success, so update self.nrows accordingly | ||||||
|  |             self.nrows = tot_rows | ||||||
|  |  | ||||||
|  |     def get_data(self, start, stop, binary=False): | ||||||
|  |         """Extract data corresponding to Python range [n:m], | ||||||
|  |         and returns a formatted string""" | ||||||
|  |         if (start is None or stop is None or | ||||||
|  |                 start > stop or start < 0 or stop > self.nrows): | ||||||
|  |             raise IndexError("Index out of range") | ||||||
|  |  | ||||||
|  |         ret = [] | ||||||
|  |         row = start | ||||||
|  |         remaining = stop - start | ||||||
|  |         while remaining > 0: | ||||||
|  |             (subdir, filename, offset, count) = self._offset_from_row(row) | ||||||
|  |             if count > remaining: | ||||||
|  |                 count = remaining | ||||||
|  |             f = self.file_open(subdir, filename) | ||||||
|  |             if binary: | ||||||
|  |                 ret.append(f.extract_binary(offset, count)) | ||||||
|  |             else: | ||||||
|  |                 ret.append(f.extract_string(offset, count)) | ||||||
|  |             remaining -= count | ||||||
|  |             row += count | ||||||
|  |         return b"".join(ret) | ||||||
|  |  | ||||||
|  |     def __getitem__(self, row): | ||||||
|  |         """Extract timestamps from a row, with table[n] notation.""" | ||||||
|  |         if row < 0 or row >= self.nrows: | ||||||
|  |             raise IndexError("Index out of range") | ||||||
|  |         (subdir, filename, offset, count) = self._offset_from_row(row) | ||||||
|  |         f = self.file_open(subdir, filename) | ||||||
|  |         return f.extract_timestamp(offset) | ||||||
|  |  | ||||||
|  |     def _remove_rows(self, subdir, filename, start, stop): | ||||||
|  |         """Helper to mark specific rows as being removed from a | ||||||
|  |         file, and potentially remove or truncate the file itself.""" | ||||||
|  |         # Close potentially open file in file_open LRU cache | ||||||
|  |         self.file_open.cache_remove(self, subdir, filename) | ||||||
|  |  | ||||||
|  |         # We keep a file like 0000.removed that contains a list of | ||||||
|  |         # which rows have been "removed".  Note that we never have to | ||||||
|  |         # remove entries from this list, because we never decrease | ||||||
|  |         # self.nrows, and so we will never overwrite those locations in the | ||||||
|  |         # file.  Only when the list covers the entire extent of the | ||||||
|  |         # file will that file be removed. | ||||||
|  |         datafile = os.path.join(self.root, subdir, filename) | ||||||
|  |         cachefile = datafile + b".removed" | ||||||
|  |         try: | ||||||
|  |             with open(cachefile, "rb") as f: | ||||||
|  |                 ranges = pickle.load(f) | ||||||
|  |             cachefile_present = True | ||||||
|  |         except Exception: | ||||||
|  |             ranges = [] | ||||||
|  |             cachefile_present = False | ||||||
|  |  | ||||||
|  |         # Append our new range and sort | ||||||
|  |         ranges.append((start, stop)) | ||||||
|  |         ranges.sort() | ||||||
|  |  | ||||||
|  |         # Merge adjacent ranges into "out" | ||||||
|  |         merged = [] | ||||||
|  |         prev = None | ||||||
|  |         for new in ranges: | ||||||
|  |             if prev is None: | ||||||
|  |                 # No previous range, so remember this one | ||||||
|  |                 prev = new | ||||||
|  |             elif prev[1] == new[0]: | ||||||
|  |                 # Previous range connected to this new one; extend prev | ||||||
|  |                 prev = (prev[0], new[1]) | ||||||
|  |             else: | ||||||
|  |                 # Not connected; append previous and start again | ||||||
|  |                 merged.append(prev) | ||||||
|  |                 prev = new | ||||||
|  |         # Last range we were looking at goes into the file.  We know | ||||||
|  |         # there was at least one (the one we just removed). | ||||||
|  |         merged.append(prev) | ||||||
|  |  | ||||||
|  |         # If the range covered the whole file, we can delete it now. | ||||||
|  |         # Note that the last file in a table may be only partially | ||||||
|  |         # full (smaller than self.rows_per_file).  We purposely leave | ||||||
|  |         # those files around rather than deleting them, because the | ||||||
|  |         # remainder will be filled on a subsequent append(), and things | ||||||
|  |         # are generally easier if we don't have to special-case that. | ||||||
|  |         if (len(merged) == 1 and | ||||||
|  |                 merged[0][0] == 0 and merged[0][1] == self.rows_per_file): | ||||||
|  |             # Delete files | ||||||
|  |             if cachefile_present: | ||||||
|  |                 os.remove(cachefile) | ||||||
|  |             self._remove_or_truncate_file(subdir, filename, 0) | ||||||
|  |         else: | ||||||
|  |             # File needs to stick around.  This means we can get | ||||||
|  |             # degenerate cases where we have large files containing as | ||||||
|  |             # little as one row.  Try to punch a hole in the file, | ||||||
|  |             # so that this region doesn't take up filesystem space. | ||||||
|  |             offset = start * self.row_size | ||||||
|  |             count = (stop - start) * self.row_size | ||||||
|  |             nilmdb.utils.fallocate.punch_hole(datafile, offset, count) | ||||||
|  |  | ||||||
|  |             # Update cache.  Try to do it atomically. | ||||||
|  |             nilmdb.utils.atomic.replace_file(cachefile, | ||||||
|  |                                              pickle.dumps(merged, 2)) | ||||||
|  |  | ||||||
|  |     def remove(self, start, stop): | ||||||
|  |         """Remove specified rows [start, stop) from this table. | ||||||
|  |  | ||||||
|  |         If a file is left empty, it is fully removed.  Otherwise, a | ||||||
|  |         parallel data file is used to remember which rows have been | ||||||
|  |         removed, and the file is otherwise untouched.""" | ||||||
|  |         if start < 0 or start > stop or stop > self.nrows: | ||||||
|  |             raise IndexError("Index out of range") | ||||||
|  |  | ||||||
|  |         row = start | ||||||
|  |         remaining = stop - start | ||||||
|  |         while remaining: | ||||||
|  |             # Loop through each file that we need to touch | ||||||
|  |             (subdir, filename, offset, count) = self._offset_from_row(row) | ||||||
|  |             if count > remaining: | ||||||
|  |                 count = remaining | ||||||
|  |             row_offset = offset // self.row_size | ||||||
|  |             # Mark the rows as being removed | ||||||
|  |             self._remove_rows(subdir, filename, row_offset, row_offset + count) | ||||||
|  |             remaining -= count | ||||||
|  |             row += count | ||||||
							
								
								
									
										15
									
								
								nilmdb/server/errors.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										15
									
								
								nilmdb/server/errors.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,15 @@ | |||||||
|  | """Exceptions""" | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class NilmDBError(Exception): | ||||||
|  |     """Base exception for NilmDB errors""" | ||||||
|  |     def __init__(self, msg="Unspecified error"): | ||||||
|  |         super().__init__(msg) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class StreamError(NilmDBError): | ||||||
|  |     pass | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class OverlapError(NilmDBError): | ||||||
|  |     pass | ||||||
							
								
								
									
										329
									
								
								nilmdb/server/interval.pyx
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										329
									
								
								nilmdb/server/interval.pyx
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,329 @@ | |||||||
|  | # cython: language_level=2 | ||||||
|  |  | ||||||
|  | """Interval, IntervalSet | ||||||
|  |  | ||||||
|  | The Interval implemented here is just like | ||||||
|  | nilmdb.utils.interval.Interval, except implemented in Cython for | ||||||
|  | speed. | ||||||
|  |  | ||||||
|  | Represents an interval of time, and a set of such intervals. | ||||||
|  |  | ||||||
|  | Intervals are half-open, ie. they include data points with timestamps | ||||||
|  | [start, end) | ||||||
|  | """ | ||||||
|  |  | ||||||
|  | # First implementation kept a sorted list of intervals and used | ||||||
|  | # biesct() to optimize some operations, but this was too slow. | ||||||
|  |  | ||||||
|  | # Second version was based on the quicksect implementation from | ||||||
|  | # python-bx, modified slightly to handle floating point intervals. | ||||||
|  | # This didn't support deletion. | ||||||
|  |  | ||||||
|  | # Third version is more similar to the first version, using a rb-tree | ||||||
|  | # instead of a simple sorted list to maintain O(log n) operations. | ||||||
|  |  | ||||||
|  | # Fourth version is an optimized rb-tree that stores interval starts | ||||||
|  | # and ends directly in the tree, like bxinterval did. | ||||||
|  |  | ||||||
|  | from ..utils.time import min_timestamp as nilmdb_min_timestamp | ||||||
|  | from ..utils.time import max_timestamp as nilmdb_max_timestamp | ||||||
|  | from ..utils.time import timestamp_to_string | ||||||
|  | from ..utils.iterator import imerge | ||||||
|  | from ..utils.interval import IntervalError | ||||||
|  | import itertools | ||||||
|  |  | ||||||
|  | cimport rbtree | ||||||
|  | from libc.stdint cimport uint64_t, int64_t | ||||||
|  |  | ||||||
|  | ctypedef int64_t timestamp_t | ||||||
|  |  | ||||||
|  | cdef class Interval: | ||||||
|  |     """Represents an interval of time.""" | ||||||
|  |  | ||||||
|  |     cdef public timestamp_t start, end | ||||||
|  |  | ||||||
|  |     def __init__(self, timestamp_t start, timestamp_t end): | ||||||
|  |         """ | ||||||
|  |         'start' and 'end' are arbitrary numbers that represent time | ||||||
|  |         """ | ||||||
|  |         if start >= end: | ||||||
|  |             # Explicitly disallow zero-width intervals (since they're half-open) | ||||||
|  |             raise IntervalError("start %s must precede end %s" % (start, end)) | ||||||
|  |         self.start = start | ||||||
|  |         self.end = end | ||||||
|  |  | ||||||
|  |     def __repr__(self): | ||||||
|  |         s = repr(self.start) + ", " + repr(self.end) | ||||||
|  |         return self.__class__.__name__ + "(" + s + ")" | ||||||
|  |  | ||||||
|  |     def __str__(self): | ||||||
|  |         return ("[" + timestamp_to_string(self.start) + | ||||||
|  |                 " -> " + timestamp_to_string(self.end) + ")") | ||||||
|  |  | ||||||
|  |     # Compare two intervals.  If non-equal, order by start then end | ||||||
|  |     def __lt__(self, Interval other): | ||||||
|  |         return (self.start, self.end) < (other.start, other.end) | ||||||
|  |     def __gt__(self, Interval other): | ||||||
|  |         return (self.start, self.end) > (other.start, other.end) | ||||||
|  |     def __le__(self, Interval other): | ||||||
|  |         return (self.start, self.end) <= (other.start, other.end) | ||||||
|  |     def __ge__(self, Interval other): | ||||||
|  |         return (self.start, self.end) >= (other.start, other.end) | ||||||
|  |     def __eq__(self, Interval other): | ||||||
|  |         return (self.start, self.end) == (other.start, other.end) | ||||||
|  |     def __ne__(self, Interval other): | ||||||
|  |         return (self.start, self.end) != (other.start, other.end) | ||||||
|  |  | ||||||
|  |     cpdef intersects(self, Interval other): | ||||||
|  |         """Return True if two Interval objects intersect""" | ||||||
|  |         if (self.end <= other.start or self.start >= other.end): | ||||||
|  |             return False | ||||||
|  |         return True | ||||||
|  |  | ||||||
|  |     cpdef subset(self, timestamp_t start, timestamp_t end): | ||||||
|  |         """Return a new Interval that is a subset of this one""" | ||||||
|  |         # A subclass that tracks additional data might override this. | ||||||
|  |         if start < self.start or end > self.end: | ||||||
|  |             raise IntervalError("not a subset") | ||||||
|  |         return Interval(start, end) | ||||||
|  |  | ||||||
|  | cdef class DBInterval(Interval): | ||||||
|  |     """ | ||||||
|  |     Like Interval, but also tracks corresponding start/end times and | ||||||
|  |     positions within the database.  These are not currently modified | ||||||
|  |     when subsets are taken, but can be used later to help zero in on | ||||||
|  |     database positions. | ||||||
|  |  | ||||||
|  |     The actual 'start' and 'end' will always fall within the database | ||||||
|  |     start and end, e.g.: | ||||||
|  |         db_start = 100, db_startpos = 10000 | ||||||
|  |         start = 123 | ||||||
|  |         end = 150 | ||||||
|  |         db_end = 200, db_endpos = 20000 | ||||||
|  |     """ | ||||||
|  |  | ||||||
|  |     cpdef public timestamp_t db_start, db_end | ||||||
|  |     cpdef public uint64_t db_startpos, db_endpos | ||||||
|  |  | ||||||
|  |     def __init__(self, start, end, | ||||||
|  |                  db_start, db_end, | ||||||
|  |                  db_startpos, db_endpos): | ||||||
|  |         """ | ||||||
|  |         'db_start' and 'db_end' are arbitrary numbers that represent | ||||||
|  |         time.  They must be a strict superset of the time interval | ||||||
|  |         covered by 'start' and 'end'.  The 'db_startpos' and | ||||||
|  |         'db_endpos' are arbitrary database position indicators that | ||||||
|  |         correspond to those points. | ||||||
|  |         """ | ||||||
|  |         Interval.__init__(self, start, end) | ||||||
|  |         self.db_start = db_start | ||||||
|  |         self.db_end = db_end | ||||||
|  |         self.db_startpos = db_startpos | ||||||
|  |         self.db_endpos = db_endpos | ||||||
|  |         if db_start > start or db_end < end: | ||||||
|  |             raise IntervalError("database times must span the interval times") | ||||||
|  |  | ||||||
|  |     def __repr__(self): | ||||||
|  |         s = repr(self.start) + ", " + repr(self.end) | ||||||
|  |         s += ", " + repr(self.db_start) + ", " + repr(self.db_end) | ||||||
|  |         s += ", " + repr(self.db_startpos) + ", " + repr(self.db_endpos) | ||||||
|  |         return self.__class__.__name__ + "(" + s + ")" | ||||||
|  |  | ||||||
|  |     cpdef subset(self, timestamp_t start, timestamp_t end): | ||||||
|  |         """ | ||||||
|  |         Return a new DBInterval that is a subset of this one | ||||||
|  |         """ | ||||||
|  |         if start < self.start or end > self.end: | ||||||
|  |             raise IntervalError("not a subset") | ||||||
|  |         return DBInterval(start, end, | ||||||
|  |                           self.db_start, self.db_end, | ||||||
|  |                           self.db_startpos, self.db_endpos) | ||||||
|  |  | ||||||
|  | cdef class IntervalSet: | ||||||
|  |     """ | ||||||
|  |     A non-intersecting set of intervals. | ||||||
|  |     """ | ||||||
|  |  | ||||||
|  |     cdef public rbtree.RBTree tree | ||||||
|  |  | ||||||
|  |     def __init__(self, source=None): | ||||||
|  |         """ | ||||||
|  |         'source' is an Interval or IntervalSet to add. | ||||||
|  |         """ | ||||||
|  |         self.tree = rbtree.RBTree() | ||||||
|  |         if source is not None: | ||||||
|  |             self += source | ||||||
|  |  | ||||||
|  |     def __iter__(self): | ||||||
|  |         for node in self.tree: | ||||||
|  |             if node.obj: | ||||||
|  |                 yield node.obj | ||||||
|  |  | ||||||
|  |     def __len__(self): | ||||||
|  |         return sum(1 for x in self) | ||||||
|  |  | ||||||
|  |     def __repr__(self): | ||||||
|  |         descs = [ repr(x) for x in self ] | ||||||
|  |         return self.__class__.__name__ + "([" + ", ".join(descs) + "])" | ||||||
|  |  | ||||||
|  |     def __str__(self): | ||||||
|  |         descs = [ str(x) for x in self ] | ||||||
|  |         return  "[" + ", ".join(descs) + "]" | ||||||
|  |  | ||||||
|  |     def __match__(self, other): | ||||||
|  |         # This isn't particularly efficient, but it shouldn't get used in the | ||||||
|  |         # general case. | ||||||
|  |         """Test equality of two IntervalSets. | ||||||
|  |  | ||||||
|  |         Treats adjacent Intervals as equivalent to one long interval, | ||||||
|  |         so this function really tests whether the IntervalSets cover | ||||||
|  |         the same spans of time.""" | ||||||
|  |         i = 0 | ||||||
|  |         j = 0 | ||||||
|  |         outside = True | ||||||
|  |  | ||||||
|  |         def is_adjacent(a, b): | ||||||
|  |             """Return True if two Intervals are adjacent (same end or start)""" | ||||||
|  |             if a.end == b.start or b.end == a.start: | ||||||
|  |                 return True | ||||||
|  |             else: | ||||||
|  |                 return False | ||||||
|  |  | ||||||
|  |         this = list(self) | ||||||
|  |         that = list(other) | ||||||
|  |  | ||||||
|  |         try: | ||||||
|  |             while True: | ||||||
|  |                 if (outside): | ||||||
|  |                     # To match, we need to be finished both sets | ||||||
|  |                     if (i >= len(this) and j >= len(that)): | ||||||
|  |                         return True | ||||||
|  |                     # Or the starts need to match | ||||||
|  |                     if (this[i].start != that[j].start): | ||||||
|  |                         return False | ||||||
|  |                     outside = False | ||||||
|  |                 else: | ||||||
|  |                     # We can move on if the two interval ends match | ||||||
|  |                     if (this[i].end == that[j].end): | ||||||
|  |                         i += 1 | ||||||
|  |                         j += 1 | ||||||
|  |                         outside = True | ||||||
|  |                     else: | ||||||
|  |                         # Whichever ends first needs to be adjacent to the next | ||||||
|  |                         if (this[i].end < that[j].end): | ||||||
|  |                             if (not is_adjacent(this[i],this[i+1])): | ||||||
|  |                                 return False | ||||||
|  |                             i += 1 | ||||||
|  |                         else: | ||||||
|  |                             if (not is_adjacent(that[j],that[j+1])): | ||||||
|  |                                 return False | ||||||
|  |                             j += 1 | ||||||
|  |         except IndexError: | ||||||
|  |             return False | ||||||
|  |  | ||||||
|  |     # Use __richcmp__ instead of __eq__, __ne__ for Cython. | ||||||
|  |     def __richcmp__(self, other, int op): | ||||||
|  |         if op == 2: # == | ||||||
|  |             return self.__match__(other) | ||||||
|  |         elif op == 3: # != | ||||||
|  |             return not self.__match__(other) | ||||||
|  |         return False | ||||||
|  |     #def __eq__(self, other): | ||||||
|  |     #    return self.__match__(other) | ||||||
|  |     # | ||||||
|  |     #def __ne__(self, other): | ||||||
|  |     #    return not self.__match__(other) | ||||||
|  |  | ||||||
|  |     def __iadd__(self, object other not None): | ||||||
|  |         """Inplace add -- modifies self | ||||||
|  |  | ||||||
|  |         This throws an exception if the regions being added intersect.""" | ||||||
|  |         if isinstance(other, Interval): | ||||||
|  |             if self.intersects(other): | ||||||
|  |                 raise IntervalError("Tried to add overlapping interval " | ||||||
|  |                                     "to this set") | ||||||
|  |             self.tree.insert(rbtree.RBNode(other.start, other.end, other)) | ||||||
|  |         else: | ||||||
|  |             for x in other: | ||||||
|  |                 self.__iadd__(x) | ||||||
|  |         return self | ||||||
|  |  | ||||||
|  |     def iadd_nocheck(self, Interval other not None): | ||||||
|  |         """Inplace add -- modifies self. | ||||||
|  |         'Optimized' version that doesn't check for intersection and | ||||||
|  |         only inserts the new interval into the tree.""" | ||||||
|  |         self.tree.insert(rbtree.RBNode(other.start, other.end, other)) | ||||||
|  |  | ||||||
|  |     def __isub__(self, Interval other not None): | ||||||
|  |         """Inplace subtract -- modifies self | ||||||
|  |  | ||||||
|  |         Removes an interval from the set.  Must exist exactly | ||||||
|  |         as provided -- cannot remove a subset of an existing interval.""" | ||||||
|  |         i = self.tree.find(other.start, other.end) | ||||||
|  |         if i is None: | ||||||
|  |             raise IntervalError("interval " + str(other) + " not in tree") | ||||||
|  |         self.tree.delete(i) | ||||||
|  |         return self | ||||||
|  |  | ||||||
|  |     def __add__(self, other not None): | ||||||
|  |         """Add -- returns a new object""" | ||||||
|  |         new = IntervalSet(self) | ||||||
|  |         new += IntervalSet(other) | ||||||
|  |         return new | ||||||
|  |  | ||||||
|  |     def __and__(self, other not None): | ||||||
|  |         """ | ||||||
|  |         Compute a new IntervalSet from the intersection of this | ||||||
|  |         IntervalSet with one other interval. | ||||||
|  |  | ||||||
|  |         Output intervals are built as subsets of the intervals in the | ||||||
|  |         first argument (self). | ||||||
|  |         """ | ||||||
|  |         out = IntervalSet() | ||||||
|  |         for i in self.intersection(other): | ||||||
|  |             out.tree.insert(rbtree.RBNode(i.start, i.end, i)) | ||||||
|  |         return out | ||||||
|  |  | ||||||
|  |     def intersection(self, Interval interval not None, orig = False): | ||||||
|  |         """ | ||||||
|  |         Compute a sequence of intervals that correspond to the | ||||||
|  |         intersection between `self` and the provided interval. | ||||||
|  |         Returns a generator that yields each of these intervals | ||||||
|  |         in turn. | ||||||
|  |  | ||||||
|  |         Output intervals are built as subsets of the intervals in the | ||||||
|  |         first argument (self). | ||||||
|  |  | ||||||
|  |         If orig = True, also return the original interval that was | ||||||
|  |         (potentially) subsetted to make the one that is being | ||||||
|  |         returned. | ||||||
|  |         """ | ||||||
|  |         if orig: | ||||||
|  |             for n in self.tree.intersect(interval.start, interval.end): | ||||||
|  |                 i = n.obj | ||||||
|  |                 subset = i.subset(max(i.start, interval.start), | ||||||
|  |                                   min(i.end, interval.end)) | ||||||
|  |                 yield (subset, i) | ||||||
|  |         else: | ||||||
|  |             for n in self.tree.intersect(interval.start, interval.end): | ||||||
|  |                 i = n.obj | ||||||
|  |                 subset = i.subset(max(i.start, interval.start), | ||||||
|  |                                   min(i.end, interval.end)) | ||||||
|  |                 yield subset | ||||||
|  |  | ||||||
|  |     cpdef intersects(self, Interval other): | ||||||
|  |         """Return True if this IntervalSet intersects another interval""" | ||||||
|  |         for n in self.tree.intersect(other.start, other.end): | ||||||
|  |             if n.obj.intersects(other): | ||||||
|  |                 return True | ||||||
|  |         return False | ||||||
|  |  | ||||||
|  |     def find_end(self, timestamp_t t): | ||||||
|  |         """ | ||||||
|  |         Return an Interval from this tree that ends at time t, or | ||||||
|  |         None if it doesn't exist. | ||||||
|  |         """ | ||||||
|  |         n = self.tree.find_left_end(t) | ||||||
|  |         if n and n.obj.end == t: | ||||||
|  |             return n.obj | ||||||
|  |         return None | ||||||
							
								
								
									
										1
									
								
								nilmdb/server/interval.pyxdep
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1
									
								
								nilmdb/server/interval.pyxdep
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1 @@ | |||||||
|  | rbtree.pxd | ||||||
							
								
								
									
										717
									
								
								nilmdb/server/nilmdb.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										717
									
								
								nilmdb/server/nilmdb.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,717 @@ | |||||||
|  | # -*- coding: utf-8 -*- | ||||||
|  |  | ||||||
|  | """NilmDB | ||||||
|  |  | ||||||
|  | Object that represents a NILM database file. | ||||||
|  |  | ||||||
|  | Manages both the SQL database and the table storage backend. | ||||||
|  | """ | ||||||
|  |  | ||||||
|  | import os | ||||||
|  | import errno | ||||||
|  | import sqlite3 | ||||||
|  |  | ||||||
|  | import nilmdb.utils | ||||||
|  | from nilmdb.utils.printf import printf | ||||||
|  | from nilmdb.utils.time import timestamp_to_bytes | ||||||
|  |  | ||||||
|  | from nilmdb.utils.interval import IntervalError | ||||||
|  | from nilmdb.server.interval import Interval, DBInterval, IntervalSet | ||||||
|  |  | ||||||
|  | from nilmdb.server import bulkdata | ||||||
|  | from nilmdb.server.errors import NilmDBError, StreamError, OverlapError | ||||||
|  |  | ||||||
|  | # Note about performance and transactions: | ||||||
|  | # | ||||||
|  | # Committing a transaction in the default sync mode (PRAGMA synchronous=FULL) | ||||||
|  | # takes about 125msec.  sqlite3 will commit transactions at 3 times: | ||||||
|  | # 1: explicit con.commit() | ||||||
|  | # 2: between a series of DML commands and non-DML commands, e.g. | ||||||
|  | #    after a series of INSERT, SELECT, but before a CREATE TABLE or PRAGMA. | ||||||
|  | # 3: at the end of an explicit transaction, e.g. "with self.con as con:" | ||||||
|  | # | ||||||
|  | # To speed things up, we can set 'PRAGMA synchronous=OFF'.  Or, it | ||||||
|  | # seems that 'PRAGMA synchronous=NORMAL' and 'PRAGMA journal_mode=WAL' | ||||||
|  | # give an equivalent speedup more safely.  That is what is used here. | ||||||
|  | _sql_schema_updates = { | ||||||
|  |     0: {"next": 1, "sql": """ | ||||||
|  |     -- All streams | ||||||
|  |     CREATE TABLE streams( | ||||||
|  |         id INTEGER PRIMARY KEY,		-- stream ID | ||||||
|  |         path TEXT UNIQUE NOT NULL,	-- path, e.g. '/newton/prep' | ||||||
|  |         layout TEXT NOT NULL		-- layout name, e.g. float32_8 | ||||||
|  |     ); | ||||||
|  |  | ||||||
|  |     -- Individual timestamped ranges in those streams. | ||||||
|  |     -- For a given start_time and end_time, this tells us that the | ||||||
|  |     -- data is stored between start_pos and end_pos. | ||||||
|  |     -- Times are stored as μs since Unix epoch | ||||||
|  |     -- Positions are opaque: PyTables rows, file offsets, etc. | ||||||
|  |     -- | ||||||
|  |     -- Note: end_pos points to the row _after_ end_time, so end_pos-1 | ||||||
|  |     -- is the last valid row. | ||||||
|  |     CREATE TABLE ranges( | ||||||
|  |         stream_id INTEGER NOT NULL, | ||||||
|  |         start_time INTEGER NOT NULL, | ||||||
|  |         end_time INTEGER NOT NULL, | ||||||
|  |         start_pos INTEGER NOT NULL, | ||||||
|  |         end_pos INTEGER NOT NULL | ||||||
|  |     ); | ||||||
|  |     CREATE INDEX _ranges_index ON ranges (stream_id, start_time, end_time); | ||||||
|  |     """}, | ||||||
|  |  | ||||||
|  |     1: {"next": 3, "sql": """ | ||||||
|  |     -- Generic dictionary-type metadata that can be associated with a stream | ||||||
|  |     CREATE TABLE metadata( | ||||||
|  |         stream_id INTEGER NOT NULL, | ||||||
|  |         key TEXT NOT NULL, | ||||||
|  |         value TEXT | ||||||
|  |     ); | ||||||
|  |     """}, | ||||||
|  |  | ||||||
|  |     2: {"error": "old format with floating-point timestamps requires " | ||||||
|  |         "nilmdb 1.3.1 or older"}, | ||||||
|  |  | ||||||
|  |     3: {"next": None}, | ||||||
|  | } | ||||||
|  |  | ||||||
|  |  | ||||||
|  | @nilmdb.utils.must_close() | ||||||
|  | class NilmDB(): | ||||||
|  |     verbose = 0 | ||||||
|  |  | ||||||
|  |     def __init__(self, basepath, | ||||||
|  |                  max_results=None, | ||||||
|  |                  max_removals=None, | ||||||
|  |                  max_int_removals=None, | ||||||
|  |                  bulkdata_args=None): | ||||||
|  |         """Initialize NilmDB at the given basepath. | ||||||
|  |         Other arguments are for debugging / testing: | ||||||
|  |  | ||||||
|  |         'max_results' is the max rows to send in a single | ||||||
|  |         stream_intervals or stream_extract response. | ||||||
|  |  | ||||||
|  |         'max_removals' is the max rows to delete at once | ||||||
|  |         in stream_remove. | ||||||
|  |  | ||||||
|  |         'max_int_removals' is the max intervals to delete | ||||||
|  |         at once in stream_remove. | ||||||
|  |  | ||||||
|  |         'bulkdata_args' is kwargs for the bulkdata module. | ||||||
|  |         """ | ||||||
|  |         if bulkdata_args is None: | ||||||
|  |             bulkdata_args = {} | ||||||
|  |  | ||||||
|  |         # set up path | ||||||
|  |         self.basepath = os.path.abspath(basepath) | ||||||
|  |  | ||||||
|  |         # Create the database path if it doesn't exist | ||||||
|  |         try: | ||||||
|  |             os.makedirs(self.basepath) | ||||||
|  |         except OSError as e: | ||||||
|  |             if e.errno != errno.EEXIST: | ||||||
|  |                 raise IOError("can't create tree " + self.basepath) | ||||||
|  |  | ||||||
|  |         # Our data goes inside it | ||||||
|  |         self.data = bulkdata.BulkData(self.basepath, **bulkdata_args) | ||||||
|  |  | ||||||
|  |         # SQLite database too | ||||||
|  |         sqlfilename = os.path.join(self.basepath, "data.sql") | ||||||
|  |         self.con = sqlite3.connect(sqlfilename, check_same_thread=True) | ||||||
|  |         try: | ||||||
|  |             self._sql_schema_update() | ||||||
|  |         except Exception: | ||||||
|  |             self.data.close() | ||||||
|  |             raise | ||||||
|  |  | ||||||
|  |         # See big comment at top about the performance implications of this | ||||||
|  |         self.con.execute("PRAGMA synchronous=NORMAL") | ||||||
|  |         self.con.execute("PRAGMA journal_mode=WAL") | ||||||
|  |  | ||||||
|  |         # Approximate largest number of elements that we want to send | ||||||
|  |         # in a single reply (for stream_intervals, stream_extract). | ||||||
|  |         self.max_results = max_results or 16384 | ||||||
|  |  | ||||||
|  |         # Remove up to this many rows per call to stream_remove. | ||||||
|  |         self.max_removals = max_removals or 1048576 | ||||||
|  |  | ||||||
|  |         # Remove up to this many intervals per call to stream_remove. | ||||||
|  |         self.max_int_removals = max_int_removals or 4096 | ||||||
|  |  | ||||||
|  |     def get_basepath(self): | ||||||
|  |         return self.basepath | ||||||
|  |  | ||||||
|  |     def close(self): | ||||||
|  |         if self.con: | ||||||
|  |             self.con.commit() | ||||||
|  |             self.con.close() | ||||||
|  |             self.con = None | ||||||
|  |         self.data.close() | ||||||
|  |  | ||||||
|  |     def _sql_schema_update(self): | ||||||
|  |         cur = self.con.cursor() | ||||||
|  |         version = cur.execute("PRAGMA user_version").fetchone()[0] | ||||||
|  |         oldversion = version | ||||||
|  |  | ||||||
|  |         while True: | ||||||
|  |             if version not in _sql_schema_updates: | ||||||
|  |                 raise Exception(self.basepath + ": unknown database version " | ||||||
|  |                                 + str(version)) | ||||||
|  |             update = _sql_schema_updates[version] | ||||||
|  |             if "error" in update: | ||||||
|  |                 raise Exception(self.basepath + ": can't use database version " | ||||||
|  |                                 + str(version) + ": " + update["error"]) | ||||||
|  |             if update["next"] is None: | ||||||
|  |                 break | ||||||
|  |             cur.executescript(update["sql"]) | ||||||
|  |             version = update["next"] | ||||||
|  |             if self.verbose: | ||||||
|  |                 printf("Database schema updated to %d\n", version) | ||||||
|  |  | ||||||
|  |         if version != oldversion: | ||||||
|  |             with self.con: | ||||||
|  |                 cur.execute("PRAGMA user_version = {v:d}".format(v=version)) | ||||||
|  |  | ||||||
|  |     def _check_user_times(self, start, end): | ||||||
|  |         if start is None: | ||||||
|  |             start = nilmdb.utils.time.min_timestamp | ||||||
|  |         if end is None: | ||||||
|  |             end = nilmdb.utils.time.max_timestamp | ||||||
|  |         if start >= end: | ||||||
|  |             raise NilmDBError("start must precede end") | ||||||
|  |         return (start, end) | ||||||
|  |  | ||||||
|  |     @nilmdb.utils.lru_cache(size=64) | ||||||
|  |     def _get_intervals(self, stream_id): | ||||||
|  |         """ | ||||||
|  |         Return a mutable IntervalSet corresponding to the given stream ID. | ||||||
|  |         """ | ||||||
|  |         iset = IntervalSet() | ||||||
|  |         result = self.con.execute("SELECT start_time, end_time, " | ||||||
|  |                                   "start_pos, end_pos " | ||||||
|  |                                   "FROM ranges " | ||||||
|  |                                   "WHERE stream_id=?", (stream_id,)) | ||||||
|  |         try: | ||||||
|  |             for (start_time, end_time, start_pos, end_pos) in result: | ||||||
|  |                 iset += DBInterval(start_time, end_time, | ||||||
|  |                                    start_time, end_time, | ||||||
|  |                                    start_pos, end_pos) | ||||||
|  |         except IntervalError: | ||||||
|  |             raise NilmDBError("unexpected overlap in ranges table!") | ||||||
|  |  | ||||||
|  |         return iset | ||||||
|  |  | ||||||
|  |     def _sql_interval_insert(self, id, start, end, start_pos, end_pos): | ||||||
|  |         """Helper that adds interval to the SQL database only""" | ||||||
|  |         self.con.execute("INSERT INTO ranges " | ||||||
|  |                          "(stream_id,start_time,end_time,start_pos,end_pos) " | ||||||
|  |                          "VALUES (?,?,?,?,?)", | ||||||
|  |                          (id, start, end, start_pos, end_pos)) | ||||||
|  |  | ||||||
|  |     def _sql_interval_delete(self, id, start, end, start_pos, end_pos): | ||||||
|  |         """Helper that removes interval from the SQL database only""" | ||||||
|  |         self.con.execute("DELETE FROM ranges WHERE " | ||||||
|  |                          "stream_id=? AND start_time=? AND " | ||||||
|  |                          "end_time=? AND start_pos=? AND end_pos=?", | ||||||
|  |                          (id, start, end, start_pos, end_pos)) | ||||||
|  |  | ||||||
|  |     def _add_interval(self, stream_id, interval, start_pos, end_pos): | ||||||
|  |         """ | ||||||
|  |         Add interval to the internal interval cache, and to the database. | ||||||
|  |         Note: arguments must be ints (not numpy.int64, etc) | ||||||
|  |         """ | ||||||
|  |         # Load this stream's intervals | ||||||
|  |         iset = self._get_intervals(stream_id) | ||||||
|  |  | ||||||
|  |         # Check for adjacency.  If there's a stream in the database | ||||||
|  |         # that ends exactly when this one starts, and the database | ||||||
|  |         # rows match up, we can make one interval that covers the | ||||||
|  |         # time range [adjacent.start -> interval.end) | ||||||
|  |         # and database rows [ adjacent.start_pos -> end_pos ]. | ||||||
|  |         # Only do this if the resulting interval isn't too large. | ||||||
|  |         max_merged_rows = 8000 * 60 * 60 * 1.05  # 1.05 hours at 8 KHz | ||||||
|  |         adjacent = iset.find_end(interval.start) | ||||||
|  |         if (adjacent is not None and | ||||||
|  |                 start_pos == adjacent.db_endpos and | ||||||
|  |                 (end_pos - adjacent.db_startpos) < max_merged_rows): | ||||||
|  |             # First delete the old one, both from our iset and the | ||||||
|  |             # database | ||||||
|  |             iset -= adjacent | ||||||
|  |             self._sql_interval_delete(stream_id, | ||||||
|  |                                       adjacent.db_start, adjacent.db_end, | ||||||
|  |                                       adjacent.db_startpos, adjacent.db_endpos) | ||||||
|  |  | ||||||
|  |             # Now update our interval so the fallthrough add is | ||||||
|  |             # correct. | ||||||
|  |             interval.start = adjacent.start | ||||||
|  |             start_pos = adjacent.db_startpos | ||||||
|  |  | ||||||
|  |         # Add the new interval to the iset | ||||||
|  |         iset.iadd_nocheck(DBInterval(interval.start, interval.end, | ||||||
|  |                                      interval.start, interval.end, | ||||||
|  |                                      start_pos, end_pos)) | ||||||
|  |  | ||||||
|  |         # Insert into the database | ||||||
|  |         self._sql_interval_insert(stream_id, interval.start, interval.end, | ||||||
|  |                                   int(start_pos), int(end_pos)) | ||||||
|  |  | ||||||
|  |         self.con.commit() | ||||||
|  |  | ||||||
|  |     def _remove_interval(self, stream_id, original, remove): | ||||||
|  |         """ | ||||||
|  |         Remove an interval from the internal cache and the database. | ||||||
|  |  | ||||||
|  |         stream_id: id of stream | ||||||
|  |          original: original DBInterval; must be already present in DB | ||||||
|  |         to_remove: DBInterval to remove; must be subset of 'original' | ||||||
|  |         """ | ||||||
|  |         # Load this stream's intervals | ||||||
|  |         iset = self._get_intervals(stream_id) | ||||||
|  |  | ||||||
|  |         # Remove existing interval from the cached set and the database | ||||||
|  |         iset -= original | ||||||
|  |         self._sql_interval_delete(stream_id, | ||||||
|  |                                   original.db_start, original.db_end, | ||||||
|  |                                   original.db_startpos, original.db_endpos) | ||||||
|  |  | ||||||
|  |         # Add back the intervals that would be left over if the | ||||||
|  |         # requested interval is removed.  There may be two of them, if | ||||||
|  |         # the removed piece was in the middle. | ||||||
|  |         def add(iset, start, end, start_pos, end_pos): | ||||||
|  |             iset += DBInterval(start, end, start, end, start_pos, end_pos) | ||||||
|  |             self._sql_interval_insert(stream_id, start, end, | ||||||
|  |                                       start_pos, end_pos) | ||||||
|  |  | ||||||
|  |         if original.start != remove.start: | ||||||
|  |             # Interval before the removed region | ||||||
|  |             add(iset, original.start, remove.start, | ||||||
|  |                 original.db_startpos, remove.db_startpos) | ||||||
|  |  | ||||||
|  |         if original.end != remove.end: | ||||||
|  |             # Interval after the removed region | ||||||
|  |             add(iset, remove.end, original.end, | ||||||
|  |                 remove.db_endpos, original.db_endpos) | ||||||
|  |  | ||||||
|  |         # Commit SQL changes | ||||||
|  |         self.con.commit() | ||||||
|  |  | ||||||
|  |         return | ||||||
|  |  | ||||||
|  |     def stream_list(self, path=None, layout=None, extended=False): | ||||||
|  |         """Return list of lists of all streams in the database. | ||||||
|  |  | ||||||
|  |         If path is specified, include only streams with a path that | ||||||
|  |         matches the given string. | ||||||
|  |  | ||||||
|  |         If layout is specified, include only streams with a layout | ||||||
|  |         that matches the given string. | ||||||
|  |  | ||||||
|  |         If extended=False, returns a list of lists containing | ||||||
|  |         the path and layout: [ path, layout ] | ||||||
|  |  | ||||||
|  |         If extended=True, returns a list of lists containing | ||||||
|  |         more information: | ||||||
|  |            path | ||||||
|  |            layout | ||||||
|  |            interval_min (earliest interval start) | ||||||
|  |            interval_max (latest interval end) | ||||||
|  |            rows         (total number of rows of data) | ||||||
|  |            time         (total time covered by this stream, in timestamp units) | ||||||
|  |         """ | ||||||
|  |         params = () | ||||||
|  |         query = "SELECT streams.path, streams.layout" | ||||||
|  |         if extended: | ||||||
|  |             query += ", min(ranges.start_time), max(ranges.end_time) " | ||||||
|  |             query += ", coalesce(sum(ranges.end_pos - ranges.start_pos), 0) " | ||||||
|  |             query += ", coalesce(sum(ranges.end_time - ranges.start_time), 0) " | ||||||
|  |         query += " FROM streams" | ||||||
|  |         if extended: | ||||||
|  |             query += " LEFT JOIN ranges ON streams.id = ranges.stream_id" | ||||||
|  |         query += " WHERE 1=1" | ||||||
|  |         if layout is not None: | ||||||
|  |             query += " AND streams.layout=?" | ||||||
|  |             params += (layout,) | ||||||
|  |         if path is not None: | ||||||
|  |             query += " AND streams.path=?" | ||||||
|  |             params += (path,) | ||||||
|  |         query += " GROUP BY streams.id ORDER BY streams.path" | ||||||
|  |         result = self.con.execute(query, params).fetchall() | ||||||
|  |         return [list(x) for x in result] | ||||||
|  |  | ||||||
|  |     def stream_intervals(self, path, start=None, end=None, diffpath=None): | ||||||
|  |         """ | ||||||
|  |         List all intervals in 'path' between 'start' and 'end'.  If | ||||||
|  |         'diffpath' is not none, list instead the set-difference | ||||||
|  |         between the intervals in the two streams; i.e. all interval | ||||||
|  |         ranges that are present in 'path' but not 'diffpath'. | ||||||
|  |  | ||||||
|  |         Returns (intervals, restart) tuple. | ||||||
|  |  | ||||||
|  |         'intervals' is a list of [start,end] timestamps of all intervals | ||||||
|  |         that exist for path, between start and end. | ||||||
|  |  | ||||||
|  |         'restart', if not None, means that there were too many results | ||||||
|  |         to return in a single request.  The data is complete from the | ||||||
|  |         starting timestamp to the point at which it was truncated, and | ||||||
|  |         a new request with a start time of 'restart' will fetch the | ||||||
|  |         next block of data. | ||||||
|  |         """ | ||||||
|  |         stream_id = self._stream_id(path) | ||||||
|  |         intervals = self._get_intervals(stream_id) | ||||||
|  |         if diffpath: | ||||||
|  |             diffstream_id = self._stream_id(diffpath) | ||||||
|  |             diffintervals = self._get_intervals(diffstream_id) | ||||||
|  |         (start, end) = self._check_user_times(start, end) | ||||||
|  |         requested = Interval(start, end) | ||||||
|  |         result = [] | ||||||
|  |         if diffpath: | ||||||
|  |             getter = nilmdb.utils.interval.set_difference( | ||||||
|  |                 intervals.intersection(requested), | ||||||
|  |                 diffintervals.intersection(requested)) | ||||||
|  |         else: | ||||||
|  |             getter = intervals.intersection(requested) | ||||||
|  |         for n, i in enumerate(getter): | ||||||
|  |             if n >= self.max_results: | ||||||
|  |                 restart = i.start | ||||||
|  |                 break | ||||||
|  |             result.append([i.start, i.end]) | ||||||
|  |         else: | ||||||
|  |             restart = None | ||||||
|  |         return (result, restart) | ||||||
|  |  | ||||||
|  |     def stream_create(self, path, layout_name): | ||||||
|  |         """Create a new table in the database. | ||||||
|  |  | ||||||
|  |         path: path to the data (e.g. '/newton/prep'). | ||||||
|  |         Paths must contain at least two elements, e.g.: | ||||||
|  |            /newton/prep | ||||||
|  |            /newton/raw | ||||||
|  |            /newton/upstairs/prep | ||||||
|  |            /newton/upstairs/raw | ||||||
|  |  | ||||||
|  |         layout_name: string for nilmdb.layout.get_named(), e.g. 'float32_8' | ||||||
|  |         """ | ||||||
|  |         # Create the bulk storage.  Raises ValueError on error, which we | ||||||
|  |         # pass along. | ||||||
|  |         self.data.create(path, layout_name) | ||||||
|  |  | ||||||
|  |         # Insert into SQL database once the bulk storage is happy | ||||||
|  |         with self.con as con: | ||||||
|  |             con.execute("INSERT INTO streams (path, layout) VALUES (?,?)", | ||||||
|  |                         (path, layout_name)) | ||||||
|  |  | ||||||
|  |     def _stream_id(self, path): | ||||||
|  |         """Return unique stream ID""" | ||||||
|  |         result = self.con.execute("SELECT id FROM streams WHERE path=?", | ||||||
|  |                                   (path,)).fetchone() | ||||||
|  |         if result is None: | ||||||
|  |             raise StreamError("No stream at path " + path) | ||||||
|  |         return result[0] | ||||||
|  |  | ||||||
|  |     def stream_set_metadata(self, path, data): | ||||||
|  |         """Set stream metadata from a dictionary, e.g. | ||||||
|  |            { description: 'Downstairs lighting', | ||||||
|  |              v_scaling: 123.45 } | ||||||
|  |            This replaces all existing metadata. | ||||||
|  |            """ | ||||||
|  |         stream_id = self._stream_id(path) | ||||||
|  |         with self.con as con: | ||||||
|  |             con.execute("DELETE FROM metadata WHERE stream_id=?", (stream_id,)) | ||||||
|  |             for key in data: | ||||||
|  |                 if data[key] != '': | ||||||
|  |                     con.execute("INSERT INTO metadata VALUES (?, ?, ?)", | ||||||
|  |                                 (stream_id, key, data[key])) | ||||||
|  |  | ||||||
|  |     def stream_get_metadata(self, path): | ||||||
|  |         """Return stream metadata as a dictionary.""" | ||||||
|  |         stream_id = self._stream_id(path) | ||||||
|  |         result = self.con.execute("SELECT metadata.key, metadata.value " | ||||||
|  |                                   "FROM metadata " | ||||||
|  |                                   "WHERE metadata.stream_id=?", (stream_id,)) | ||||||
|  |         data = {} | ||||||
|  |         for (key, value) in result: | ||||||
|  |             data[key] = value | ||||||
|  |         return data | ||||||
|  |  | ||||||
|  |     def stream_update_metadata(self, path, newdata): | ||||||
|  |         """Update stream metadata from a dictionary""" | ||||||
|  |         data = self.stream_get_metadata(path) | ||||||
|  |         data.update(newdata) | ||||||
|  |         self.stream_set_metadata(path, data) | ||||||
|  |  | ||||||
|  |     def stream_rename(self, oldpath, newpath): | ||||||
|  |         """Rename a stream.""" | ||||||
|  |         stream_id = self._stream_id(oldpath) | ||||||
|  |  | ||||||
|  |         # Rename the data | ||||||
|  |         self.data.rename(oldpath, newpath) | ||||||
|  |  | ||||||
|  |         # Rename the stream in the database | ||||||
|  |         with self.con as con: | ||||||
|  |             con.execute("UPDATE streams SET path=? WHERE id=?", | ||||||
|  |                         (newpath, stream_id)) | ||||||
|  |  | ||||||
|  |     def stream_destroy(self, path): | ||||||
|  |         """Fully remove a table from the database.  Fails if there are | ||||||
|  |         any intervals data present; remove them first.  Metadata is | ||||||
|  |         also removed.""" | ||||||
|  |         stream_id = self._stream_id(path) | ||||||
|  |  | ||||||
|  |         # Verify that no intervals are present, and clear the cache | ||||||
|  |         iset = self._get_intervals(stream_id) | ||||||
|  |         if iset: | ||||||
|  |             raise NilmDBError("all intervals must be removed before " | ||||||
|  |                               "destroying a stream") | ||||||
|  |         self._get_intervals.cache_remove(self, stream_id) | ||||||
|  |  | ||||||
|  |         # Delete the bulkdata storage | ||||||
|  |         self.data.destroy(path) | ||||||
|  |  | ||||||
|  |         # Delete metadata, stream, intervals (should be none) | ||||||
|  |         with self.con as con: | ||||||
|  |             con.execute("DELETE FROM metadata WHERE stream_id=?", (stream_id,)) | ||||||
|  |             con.execute("DELETE FROM ranges WHERE stream_id=?", (stream_id,)) | ||||||
|  |             con.execute("DELETE FROM streams WHERE id=?", (stream_id,)) | ||||||
|  |  | ||||||
|  |     def stream_insert(self, path, start, end, data, binary=False): | ||||||
|  |         """Insert new data into the database. | ||||||
|  |            path: Path at which to add the data | ||||||
|  |            start: Starting timestamp | ||||||
|  |            end: Ending timestamp | ||||||
|  |            data: Textual data, formatted according to the layout of path | ||||||
|  |  | ||||||
|  |            'binary', if True, means that 'data' is raw binary: | ||||||
|  |            little-endian, matching the current table's layout, | ||||||
|  |            including the int64 timestamp. | ||||||
|  |            """ | ||||||
|  |         # First check for basic overlap using timestamp info given. | ||||||
|  |         stream_id = self._stream_id(path) | ||||||
|  |         iset = self._get_intervals(stream_id) | ||||||
|  |         interval = Interval(start, end) | ||||||
|  |         if iset.intersects(interval): | ||||||
|  |             raise OverlapError("new data overlaps existing data at range: " | ||||||
|  |                                + str(iset & interval)) | ||||||
|  |  | ||||||
|  |         # Tenatively append the data.  This will raise a ValueError if | ||||||
|  |         # there are any parse errors. | ||||||
|  |         table = self.data.getnode(path) | ||||||
|  |         row_start = table.nrows | ||||||
|  |         table.append_data(data, start, end, binary) | ||||||
|  |         row_end = table.nrows | ||||||
|  |  | ||||||
|  |         # Insert the record into the sql database. | ||||||
|  |         self._add_interval(stream_id, interval, row_start, row_end) | ||||||
|  |  | ||||||
|  |         # And that's all | ||||||
|  |         return | ||||||
|  |  | ||||||
|  |     def _bisect_left(self, a, x, lo, hi): | ||||||
|  |         # Like bisect.bisect_left, but doesn't choke on large indices on | ||||||
|  |         # 32-bit systems, like bisect's fast C implementation does. | ||||||
|  |         while lo < hi: | ||||||
|  |             mid = (lo + hi) // 2 | ||||||
|  |             if a[mid] < x: | ||||||
|  |                 lo = mid + 1 | ||||||
|  |             else: | ||||||
|  |                 hi = mid | ||||||
|  |         return lo | ||||||
|  |  | ||||||
|  |     def _find_start(self, table, dbinterval): | ||||||
|  |         """ | ||||||
|  |         Given a DBInterval, find the row in the database that | ||||||
|  |         corresponds to the start time.  Return the first database | ||||||
|  |         position with a timestamp (first element) greater than or | ||||||
|  |         equal to 'start'. | ||||||
|  |         """ | ||||||
|  |         # Optimization for the common case where an interval wasn't truncated | ||||||
|  |         if dbinterval.start == dbinterval.db_start: | ||||||
|  |             return dbinterval.db_startpos | ||||||
|  |         return self._bisect_left(table, | ||||||
|  |                                  dbinterval.start, | ||||||
|  |                                  dbinterval.db_startpos, | ||||||
|  |                                  dbinterval.db_endpos) | ||||||
|  |  | ||||||
|  |     def _find_end(self, table, dbinterval): | ||||||
|  |         """ | ||||||
|  |         Given a DBInterval, find the row in the database that follows | ||||||
|  |         the end time.  Return the first database position after the | ||||||
|  |         row with timestamp (first element) greater than or equal | ||||||
|  |         to 'end'. | ||||||
|  |         """ | ||||||
|  |         # Optimization for the common case where an interval wasn't truncated | ||||||
|  |         if dbinterval.end == dbinterval.db_end: | ||||||
|  |             return dbinterval.db_endpos | ||||||
|  |         # Note that we still use bisect_left here, because we don't | ||||||
|  |         # want to include the given timestamp in the results.  This is | ||||||
|  |         # so a queries like 1:00 -> 2:00 and 2:00 -> 3:00 return | ||||||
|  |         # non-overlapping data. | ||||||
|  |         return self._bisect_left(table, | ||||||
|  |                                  dbinterval.end, | ||||||
|  |                                  dbinterval.db_startpos, | ||||||
|  |                                  dbinterval.db_endpos) | ||||||
|  |  | ||||||
|  |     def stream_extract(self, path, start=None, end=None, | ||||||
|  |                        count=False, markup=False, binary=False): | ||||||
|  |         """ | ||||||
|  |         Returns (data, restart) tuple. | ||||||
|  |  | ||||||
|  |         'data' is ASCII-formatted data from the database, formatted | ||||||
|  |         according to the layout of the stream. | ||||||
|  |  | ||||||
|  |         'restart', if not None, means that there were too many results to | ||||||
|  |         return in a single request.  The data is complete from the | ||||||
|  |         starting timestamp to the point at which it was truncated, | ||||||
|  |         and a new request with a start time of 'restart' will fetch | ||||||
|  |         the next block of data. | ||||||
|  |  | ||||||
|  |         'count', if true, means to not return raw data, but just the count | ||||||
|  |         of rows that would have been returned.  This is much faster | ||||||
|  |         than actually fetching the data.  It is not limited by | ||||||
|  |         max_results. | ||||||
|  |  | ||||||
|  |         'markup', if true, indicates that returned data should be | ||||||
|  |         marked with a comment denoting when a particular interval | ||||||
|  |         starts, and another comment when an interval ends. | ||||||
|  |  | ||||||
|  |         'binary', if true, means to return raw binary rather than | ||||||
|  |         ASCII-formatted data. | ||||||
|  |         """ | ||||||
|  |         stream_id = self._stream_id(path) | ||||||
|  |         table = self.data.getnode(path) | ||||||
|  |         intervals = self._get_intervals(stream_id) | ||||||
|  |         (start, end) = self._check_user_times(start, end) | ||||||
|  |         requested = Interval(start, end) | ||||||
|  |         result = [] | ||||||
|  |         matched = 0 | ||||||
|  |         remaining = self.max_results | ||||||
|  |         restart = None | ||||||
|  |         if binary and (markup or count): | ||||||
|  |             raise NilmDBError("binary mode can't be used with markup or count") | ||||||
|  |         for interval in intervals.intersection(requested): | ||||||
|  |             # Reading single rows from the table is too slow, so | ||||||
|  |             # we use two bisections to find both the starting and | ||||||
|  |             # ending row for this particular interval, then | ||||||
|  |             # read the entire range as one slice. | ||||||
|  |             row_start = self._find_start(table, interval) | ||||||
|  |             row_end = self._find_end(table, interval) | ||||||
|  |  | ||||||
|  |             if count: | ||||||
|  |                 matched += row_end - row_start | ||||||
|  |                 continue | ||||||
|  |  | ||||||
|  |             # Shorten it if we'll hit the maximum number of results | ||||||
|  |             row_max = row_start + remaining | ||||||
|  |             if row_max < row_end: | ||||||
|  |                 row_end = row_max | ||||||
|  |                 restart = table[row_max] | ||||||
|  |  | ||||||
|  |             # Add markup | ||||||
|  |             if markup: | ||||||
|  |                 result.append(b"# interval-start " + | ||||||
|  |                               timestamp_to_bytes(interval.start) + b"\n") | ||||||
|  |  | ||||||
|  |             # Gather these results up | ||||||
|  |             result.append(table.get_data(row_start, row_end, binary)) | ||||||
|  |  | ||||||
|  |             # Count them | ||||||
|  |             remaining -= row_end - row_start | ||||||
|  |  | ||||||
|  |             # Add markup, and exit if restart is set. | ||||||
|  |             if restart is not None: | ||||||
|  |                 if markup: | ||||||
|  |                     result.append(b"# interval-end " + | ||||||
|  |                                   timestamp_to_bytes(restart) + b"\n") | ||||||
|  |                 break | ||||||
|  |             if markup: | ||||||
|  |                 result.append(b"# interval-end " + | ||||||
|  |                               timestamp_to_bytes(interval.end) + b"\n") | ||||||
|  |  | ||||||
|  |         if count: | ||||||
|  |             return matched | ||||||
|  |         full_result = b"".join(result) | ||||||
|  |         return (full_result, restart) | ||||||
|  |  | ||||||
|  |     def stream_remove(self, path, start=None, end=None): | ||||||
|  |         """ | ||||||
|  |         Remove data from the specified time interval within a stream. | ||||||
|  |  | ||||||
|  |         Removes data in the interval [start, end), and intervals are | ||||||
|  |         truncated or split appropriately. | ||||||
|  |  | ||||||
|  |         Returns a (removed, restart) tuple. | ||||||
|  |  | ||||||
|  |         'removed' is the number of data points that were removed. | ||||||
|  |  | ||||||
|  |         'restart', if not None, means there were too many rows to | ||||||
|  |         remove in a single request.  This function should be called | ||||||
|  |         again with a start time of 'restart' to complete the removal. | ||||||
|  |         """ | ||||||
|  |         stream_id = self._stream_id(path) | ||||||
|  |         table = self.data.getnode(path) | ||||||
|  |         intervals = self._get_intervals(stream_id) | ||||||
|  |         (start, end) = self._check_user_times(start, end) | ||||||
|  |         to_remove = Interval(start, end) | ||||||
|  |         removed = 0 | ||||||
|  |         remaining = self.max_removals | ||||||
|  |         int_remaining = self.max_int_removals | ||||||
|  |         restart = None | ||||||
|  |  | ||||||
|  |         # Can't remove intervals from within the iterator, so we need to | ||||||
|  |         # remember what's currently in the intersection now. | ||||||
|  |         all_candidates = list(intervals.intersection(to_remove, orig=True)) | ||||||
|  |  | ||||||
|  |         remove_start = None | ||||||
|  |         remove_end = None | ||||||
|  |  | ||||||
|  |         for (dbint, orig) in all_candidates: | ||||||
|  |             # Stop if we've hit the max number of interval removals | ||||||
|  |             if int_remaining <= 0: | ||||||
|  |                 restart = dbint.start | ||||||
|  |                 break | ||||||
|  |  | ||||||
|  |             # Find row start and end | ||||||
|  |             row_start = self._find_start(table, dbint) | ||||||
|  |             row_end = self._find_end(table, dbint) | ||||||
|  |  | ||||||
|  |             # Shorten it if we'll hit the maximum number of removals | ||||||
|  |             row_max = row_start + remaining | ||||||
|  |             if row_max < row_end: | ||||||
|  |                 row_end = row_max | ||||||
|  |                 dbint.end = table[row_max] | ||||||
|  |                 restart = dbint.end | ||||||
|  |  | ||||||
|  |             # Adjust the DBInterval to match the newly found ends | ||||||
|  |             dbint.db_start = dbint.start | ||||||
|  |             dbint.db_end = dbint.end | ||||||
|  |             dbint.db_startpos = row_start | ||||||
|  |             dbint.db_endpos = row_end | ||||||
|  |  | ||||||
|  |             # Remove interval from the database | ||||||
|  |             self._remove_interval(stream_id, orig, dbint) | ||||||
|  |  | ||||||
|  |             # Remove data from the underlying table storage, | ||||||
|  |             # coalescing adjacent removals to reduce the number of calls | ||||||
|  |             # to table.remove. | ||||||
|  |             if remove_end == row_start: | ||||||
|  |                 # Extend our coalesced region | ||||||
|  |                 remove_end = row_end | ||||||
|  |             else: | ||||||
|  |                 # Perform previous removal, then save this one | ||||||
|  |                 if remove_end is not None: | ||||||
|  |                     table.remove(remove_start, remove_end) | ||||||
|  |                 remove_start = row_start | ||||||
|  |                 remove_end = row_end | ||||||
|  |  | ||||||
|  |             # Count how many were removed | ||||||
|  |             removed += row_end - row_start | ||||||
|  |             remaining -= row_end - row_start | ||||||
|  |             int_remaining -= 1 | ||||||
|  |  | ||||||
|  |             if restart is not None: | ||||||
|  |                 break | ||||||
|  |  | ||||||
|  |         # Perform any final coalesced removal | ||||||
|  |         if remove_end is not None: | ||||||
|  |             table.remove(remove_start, remove_end) | ||||||
|  |  | ||||||
|  |         return (removed, restart) | ||||||
							
								
								
									
										25
									
								
								nilmdb/server/rbtree.pxd
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										25
									
								
								nilmdb/server/rbtree.pxd
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,25 @@ | |||||||
|  | # cython: language_level=2 | ||||||
|  |  | ||||||
|  | cdef class RBNode: | ||||||
|  |     cdef public object obj | ||||||
|  |     cdef public double start, end | ||||||
|  |     cdef public int red | ||||||
|  |     cdef public RBNode left, right, parent | ||||||
|  |  | ||||||
|  | cdef class RBTree: | ||||||
|  |     cdef public RBNode nil, root | ||||||
|  |  | ||||||
|  |     cpdef getroot(RBTree self) | ||||||
|  |     cdef void __rotate_left(RBTree self, RBNode x) | ||||||
|  |     cdef void __rotate_right(RBTree self, RBNode y) | ||||||
|  |     cdef RBNode __successor(RBTree self, RBNode x) | ||||||
|  |     cpdef RBNode successor(RBTree self, RBNode x) | ||||||
|  |     cdef RBNode __predecessor(RBTree self, RBNode x) | ||||||
|  |     cpdef RBNode predecessor(RBTree self, RBNode x) | ||||||
|  |     cpdef insert(RBTree self, RBNode z) | ||||||
|  |     cdef void __insert_fixup(RBTree self, RBNode x) | ||||||
|  |     cpdef delete(RBTree self, RBNode z) | ||||||
|  |     cdef inline void __delete_fixup(RBTree self, RBNode x) | ||||||
|  |     cpdef RBNode find(RBTree self, double start, double end) | ||||||
|  |     cpdef RBNode find_left_end(RBTree self, double t) | ||||||
|  |     cpdef RBNode find_right_start(RBTree self, double t) | ||||||
							
								
								
									
										378
									
								
								nilmdb/server/rbtree.pyx
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										378
									
								
								nilmdb/server/rbtree.pyx
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,378 @@ | |||||||
|  | # cython: profile=False | ||||||
|  | # cython: cdivision=True | ||||||
|  | # cython: language_level=2 | ||||||
|  |  | ||||||
|  | """ | ||||||
|  | Jim Paris <jim@jtan.com> | ||||||
|  |  | ||||||
|  | Red-black tree, where keys are stored as start/end timestamps. | ||||||
|  | This is a basic interval tree that holds half-open intervals: | ||||||
|  |   [start, end) | ||||||
|  | Intervals must not overlap.  Fixing that would involve making this | ||||||
|  | into an augmented interval tree as described in CLRS 14.3. | ||||||
|  |  | ||||||
|  | Code that assumes non-overlapping intervals is marked with the | ||||||
|  | string 'non-overlapping'. | ||||||
|  | """ | ||||||
|  |  | ||||||
|  | import sys | ||||||
|  | cimport rbtree | ||||||
|  |  | ||||||
|  | cdef class RBNode: | ||||||
|  |     """One node of the Red/Black tree, containing a key (start, end) | ||||||
|  |     and value (obj)""" | ||||||
|  |     def __init__(self, double start, double end, object obj = None): | ||||||
|  |         self.obj = obj | ||||||
|  |         self.start = start | ||||||
|  |         self.end = end | ||||||
|  |         self.red = False | ||||||
|  |         self.left = None | ||||||
|  |         self.right = None | ||||||
|  |  | ||||||
|  |     def __str__(self): | ||||||
|  |         if self.red: | ||||||
|  |             color = "R" | ||||||
|  |         else: | ||||||
|  |             color = "B" | ||||||
|  |         if self.start == sys.float_info.min: | ||||||
|  |             return "[node nil]" | ||||||
|  |         return ("[node (" | ||||||
|  |                 + str(self.obj) + ") " | ||||||
|  |                 + str(self.start) + " -> " + str(self.end) + " " | ||||||
|  |                 + color + "]") | ||||||
|  |  | ||||||
|  | cdef class RBTree: | ||||||
|  |     """Red/Black tree""" | ||||||
|  |  | ||||||
|  |     # Init | ||||||
|  |     def __init__(self): | ||||||
|  |         self.nil = RBNode(start = sys.float_info.min, | ||||||
|  |                           end = sys.float_info.min) | ||||||
|  |         self.nil.left = self.nil | ||||||
|  |         self.nil.right = self.nil | ||||||
|  |         self.nil.parent = self.nil | ||||||
|  |  | ||||||
|  |         self.root = RBNode(start = sys.float_info.max, | ||||||
|  |                            end = sys.float_info.max) | ||||||
|  |         self.root.left = self.nil | ||||||
|  |         self.root.right = self.nil | ||||||
|  |         self.root.parent = self.nil | ||||||
|  |  | ||||||
|  |     # We have a dummy root node to simplify operations, so from an | ||||||
|  |     # external point of view, its left child is the real root. | ||||||
|  |     cpdef getroot(self): | ||||||
|  |         return self.root.left | ||||||
|  |  | ||||||
|  |     # Rotations and basic operations | ||||||
|  |     cdef void __rotate_left(self, RBNode x): | ||||||
|  |         """Rotate left: | ||||||
|  |         #   x           y | ||||||
|  |         #  / \   -->   / \ | ||||||
|  |         # z   y       x   w | ||||||
|  |         #    / \     / \ | ||||||
|  |         #   v   w   z   v | ||||||
|  |         """ | ||||||
|  |         cdef RBNode y = x.right | ||||||
|  |         x.right = y.left | ||||||
|  |         if y.left is not self.nil: | ||||||
|  |             y.left.parent = x | ||||||
|  |         y.parent = x.parent | ||||||
|  |         if x is x.parent.left: | ||||||
|  |             x.parent.left = y | ||||||
|  |         else: | ||||||
|  |             x.parent.right = y | ||||||
|  |         y.left = x | ||||||
|  |         x.parent = y | ||||||
|  |  | ||||||
|  |     cdef void __rotate_right(self, RBNode y): | ||||||
|  |         """Rotate right: | ||||||
|  |         #     y           x | ||||||
|  |         #    / \   -->   / \ | ||||||
|  |         #   x   w       z   y | ||||||
|  |         #  / \             / \ | ||||||
|  |         # z   v           v   w | ||||||
|  |         """ | ||||||
|  |         cdef RBNode x = y.left | ||||||
|  |         y.left = x.right | ||||||
|  |         if x.right is not self.nil: | ||||||
|  |             x.right.parent = y | ||||||
|  |         x.parent = y.parent | ||||||
|  |         if y is y.parent.left: | ||||||
|  |             y.parent.left = x | ||||||
|  |         else: | ||||||
|  |             y.parent.right = x | ||||||
|  |         x.right = y | ||||||
|  |         y.parent = x | ||||||
|  |  | ||||||
|  |     cdef RBNode __successor(self, RBNode x): | ||||||
|  |         """Returns the successor of RBNode x""" | ||||||
|  |         cdef RBNode y = x.right | ||||||
|  |         if y is not self.nil: | ||||||
|  |             while y.left is not self.nil: | ||||||
|  |                 y = y.left | ||||||
|  |         else: | ||||||
|  |             y = x.parent | ||||||
|  |             while x is y.right: | ||||||
|  |                 x = y | ||||||
|  |                 y = y.parent | ||||||
|  |             if y is self.root: | ||||||
|  |                 return self.nil | ||||||
|  |         return y | ||||||
|  |     cpdef RBNode successor(self, RBNode x): | ||||||
|  |         """Returns the successor of RBNode x, or None""" | ||||||
|  |         cdef RBNode y = self.__successor(x) | ||||||
|  |         return y if y is not self.nil else None | ||||||
|  |  | ||||||
|  |     cdef RBNode __predecessor(self, RBNode x): | ||||||
|  |         """Returns the predecessor of RBNode x""" | ||||||
|  |         cdef RBNode y = x.left | ||||||
|  |         if y is not self.nil: | ||||||
|  |             while y.right is not self.nil: | ||||||
|  |                 y = y.right | ||||||
|  |         else: | ||||||
|  |             y = x.parent | ||||||
|  |             while x is y.left: | ||||||
|  |                 if y is self.root: | ||||||
|  |                     y = self.nil | ||||||
|  |                     break | ||||||
|  |                 x = y | ||||||
|  |                 y = y.parent | ||||||
|  |         return y | ||||||
|  |     cpdef RBNode predecessor(self, RBNode x): | ||||||
|  |         """Returns the predecessor of RBNode x, or None""" | ||||||
|  |         cdef RBNode y = self.__predecessor(x) | ||||||
|  |         return y if y is not self.nil else None | ||||||
|  |  | ||||||
|  |     # Insertion | ||||||
|  |     cpdef insert(self, RBNode z): | ||||||
|  |         """Insert RBNode z into RBTree and rebalance as necessary""" | ||||||
|  |         z.left = self.nil | ||||||
|  |         z.right = self.nil | ||||||
|  |         cdef RBNode y = self.root | ||||||
|  |         cdef RBNode x = self.root.left | ||||||
|  |         while x is not self.nil: | ||||||
|  |             y = x | ||||||
|  |             if (x.start > z.start or (x.start == z.start and x.end > z.end)): | ||||||
|  |                 x = x.left | ||||||
|  |             else: | ||||||
|  |                 x = x.right | ||||||
|  |         z.parent = y | ||||||
|  |         if (y is self.root or | ||||||
|  |             (y.start > z.start or (y.start == z.start and y.end > z.end))): | ||||||
|  |             y.left = z | ||||||
|  |         else: | ||||||
|  |             y.right = z | ||||||
|  |         # relabel/rebalance | ||||||
|  |         self.__insert_fixup(z) | ||||||
|  |  | ||||||
|  |     cdef void __insert_fixup(self, RBNode x): | ||||||
|  |         """Rebalance/fix RBTree after a simple insertion of RBNode x""" | ||||||
|  |         x.red = True | ||||||
|  |         while x.parent.red: | ||||||
|  |             if x.parent is x.parent.parent.left: | ||||||
|  |                 y = x.parent.parent.right | ||||||
|  |                 if y.red: | ||||||
|  |                     x.parent.red = False | ||||||
|  |                     y.red = False | ||||||
|  |                     x.parent.parent.red = True | ||||||
|  |                     x = x.parent.parent | ||||||
|  |                 else: | ||||||
|  |                     if x is x.parent.right: | ||||||
|  |                         x = x.parent | ||||||
|  |                         self.__rotate_left(x) | ||||||
|  |                     x.parent.red = False | ||||||
|  |                     x.parent.parent.red = True | ||||||
|  |                     self.__rotate_right(x.parent.parent) | ||||||
|  |             else: # same as above, left/right switched | ||||||
|  |                 y = x.parent.parent.left | ||||||
|  |                 if y.red: | ||||||
|  |                     x.parent.red = False | ||||||
|  |                     y.red = False | ||||||
|  |                     x.parent.parent.red = True | ||||||
|  |                     x = x.parent.parent | ||||||
|  |                 else: | ||||||
|  |                     if x is x.parent.left: | ||||||
|  |                         x = x.parent | ||||||
|  |                         self.__rotate_right(x) | ||||||
|  |                     x.parent.red = False | ||||||
|  |                     x.parent.parent.red = True | ||||||
|  |                     self.__rotate_left(x.parent.parent) | ||||||
|  |         self.root.left.red = False | ||||||
|  |  | ||||||
|  |     # Deletion | ||||||
|  |     cpdef delete(self, RBNode z): | ||||||
|  |         if z.left is None or z.right is None: | ||||||
|  |             raise AttributeError("you can only delete a node object " | ||||||
|  |                                  + "from the tree; use find() to get one") | ||||||
|  |         cdef RBNode x, y | ||||||
|  |         if z.left is self.nil or z.right is self.nil: | ||||||
|  |             y = z | ||||||
|  |         else: | ||||||
|  |             y = self.__successor(z) | ||||||
|  |         if y.left is self.nil: | ||||||
|  |             x = y.right | ||||||
|  |         else: | ||||||
|  |             x = y.left | ||||||
|  |         x.parent = y.parent | ||||||
|  |         if x.parent is self.root: | ||||||
|  |             self.root.left = x | ||||||
|  |         else: | ||||||
|  |             if y is y.parent.left: | ||||||
|  |                 y.parent.left = x | ||||||
|  |             else: | ||||||
|  |                 y.parent.right = x | ||||||
|  |         if y is not z: | ||||||
|  |             # y is the node to splice out, x is its child | ||||||
|  |             y.left = z.left | ||||||
|  |             y.right = z.right | ||||||
|  |             y.parent = z.parent | ||||||
|  |             z.left.parent = y | ||||||
|  |             z.right.parent = y | ||||||
|  |             if z is z.parent.left: | ||||||
|  |                 z.parent.left = y | ||||||
|  |             else: | ||||||
|  |                 z.parent.right = y | ||||||
|  |             if not y.red: | ||||||
|  |                 y.red = z.red | ||||||
|  |                 self.__delete_fixup(x) | ||||||
|  |             else: | ||||||
|  |                 y.red = z.red | ||||||
|  |         else: | ||||||
|  |             if not y.red: | ||||||
|  |                 self.__delete_fixup(x) | ||||||
|  |  | ||||||
|  |     cdef void __delete_fixup(self, RBNode x): | ||||||
|  |         """Rebalance/fix RBTree after a deletion.  RBNode x is the | ||||||
|  |         child of the spliced out node.""" | ||||||
|  |         cdef RBNode rootLeft = self.root.left | ||||||
|  |         while not x.red and x is not rootLeft: | ||||||
|  |             if x is x.parent.left: | ||||||
|  |                 w = x.parent.right | ||||||
|  |                 if w.red: | ||||||
|  |                     w.red = False | ||||||
|  |                     x.parent.red = True | ||||||
|  |                     self.__rotate_left(x.parent) | ||||||
|  |                     w = x.parent.right | ||||||
|  |                 if not w.right.red and not w.left.red: | ||||||
|  |                     w.red = True | ||||||
|  |                     x = x.parent | ||||||
|  |                 else: | ||||||
|  |                     if not w.right.red: | ||||||
|  |                         w.left.red = False | ||||||
|  |                         w.red = True | ||||||
|  |                         self.__rotate_right(w) | ||||||
|  |                         w = x.parent.right | ||||||
|  |                     w.red = x.parent.red | ||||||
|  |                     x.parent.red = False | ||||||
|  |                     w.right.red = False | ||||||
|  |                     self.__rotate_left(x.parent) | ||||||
|  |                     x = rootLeft # exit loop | ||||||
|  |             else: # same as above, left/right switched | ||||||
|  |                 w = x.parent.left | ||||||
|  |                 if w.red: | ||||||
|  |                     w.red = False | ||||||
|  |                     x.parent.red = True | ||||||
|  |                     self.__rotate_right(x.parent) | ||||||
|  |                     w = x.parent.left | ||||||
|  |                 if not w.left.red and not w.right.red: | ||||||
|  |                     w.red = True | ||||||
|  |                     x = x.parent | ||||||
|  |                 else: | ||||||
|  |                     if not w.left.red: | ||||||
|  |                         w.right.red = False | ||||||
|  |                         w.red = True | ||||||
|  |                         self.__rotate_left(w) | ||||||
|  |                         w = x.parent.left | ||||||
|  |                     w.red = x.parent.red | ||||||
|  |                     x.parent.red = False | ||||||
|  |                     w.left.red = False | ||||||
|  |                     self.__rotate_right(x.parent) | ||||||
|  |                     x = rootLeft # exit loop | ||||||
|  |         x.red = False | ||||||
|  |  | ||||||
|  |     # Walking, searching | ||||||
|  |     def __iter__(self): | ||||||
|  |         return self.inorder() | ||||||
|  |  | ||||||
|  |     def inorder(self, RBNode x = None): | ||||||
|  |         """Generator that performs an inorder walk for the tree | ||||||
|  |         rooted at RBNode x""" | ||||||
|  |         if x is None: | ||||||
|  |             x = self.getroot() | ||||||
|  |         while x.left is not self.nil: | ||||||
|  |             x = x.left | ||||||
|  |         while x is not self.nil: | ||||||
|  |             yield x | ||||||
|  |             x = self.__successor(x) | ||||||
|  |  | ||||||
|  |     cpdef RBNode find(self, double start, double end): | ||||||
|  |         """Return the node with exactly the given start and end.""" | ||||||
|  |         cdef RBNode x = self.getroot() | ||||||
|  |         while x is not self.nil: | ||||||
|  |             if start < x.start: | ||||||
|  |                 x = x.left | ||||||
|  |             elif start == x.start: | ||||||
|  |                 if end == x.end: | ||||||
|  |                     break # found it | ||||||
|  |                 elif end < x.end: | ||||||
|  |                     x = x.left | ||||||
|  |                 else: | ||||||
|  |                     x = x.right | ||||||
|  |             else: | ||||||
|  |                 x = x.right | ||||||
|  |         return x if x is not self.nil else None | ||||||
|  |  | ||||||
|  |     cpdef RBNode find_left_end(self, double t): | ||||||
|  |         """Find the leftmode node with end >= t.  With non-overlapping | ||||||
|  |         intervals, this is the first node that might overlap time t. | ||||||
|  |  | ||||||
|  |         Note that this relies on non-overlapping intervals, since | ||||||
|  |         it assumes that we can use the endpoints to traverse the | ||||||
|  |         tree even though it was created using the start points.""" | ||||||
|  |         cdef RBNode x = self.getroot() | ||||||
|  |         while x is not self.nil: | ||||||
|  |             if t < x.end: | ||||||
|  |                 if x.left is self.nil: | ||||||
|  |                     break | ||||||
|  |                 x = x.left | ||||||
|  |             elif t == x.end: | ||||||
|  |                 break | ||||||
|  |             else: | ||||||
|  |                 if x.right is self.nil: | ||||||
|  |                     x = self.__successor(x) | ||||||
|  |                     break | ||||||
|  |                 x = x.right | ||||||
|  |         return x if x is not self.nil else None | ||||||
|  |  | ||||||
|  |     cpdef RBNode find_right_start(self, double t): | ||||||
|  |         """Find the rightmode node with start <= t.  With non-overlapping | ||||||
|  |         intervals, this is the last node that might overlap time t.""" | ||||||
|  |         cdef RBNode x = self.getroot() | ||||||
|  |         while x is not self.nil: | ||||||
|  |             if t < x.start: | ||||||
|  |                 if x.left is self.nil: | ||||||
|  |                     x = self.__predecessor(x) | ||||||
|  |                     break | ||||||
|  |                 x = x.left | ||||||
|  |             elif t == x.start: | ||||||
|  |                 break | ||||||
|  |             else: | ||||||
|  |                 if x.right is self.nil: | ||||||
|  |                     break | ||||||
|  |                 x = x.right | ||||||
|  |         return x if x is not self.nil else None | ||||||
|  |  | ||||||
|  |     # Intersections | ||||||
|  |     def intersect(self, double start, double end): | ||||||
|  |         """Generator that returns nodes that overlap the given | ||||||
|  |         (start,end) range.  Assumes non-overlapping intervals.""" | ||||||
|  |         # Start with the leftmode node that ends after start | ||||||
|  |         cdef RBNode n = self.find_left_end(start) | ||||||
|  |         while n is not None: | ||||||
|  |             if n.start >= end: | ||||||
|  |                 # this node starts after the requested end; we're done | ||||||
|  |                 break | ||||||
|  |             if start < n.end: | ||||||
|  |                 # this node overlaps our requested area | ||||||
|  |                 yield n | ||||||
|  |             n = self.successor(n) | ||||||
							
								
								
									
										1
									
								
								nilmdb/server/rbtree.pyxdep
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1
									
								
								nilmdb/server/rbtree.pyxdep
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1 @@ | |||||||
|  | rbtree.pxd | ||||||
							
								
								
									
										806
									
								
								nilmdb/server/rocket.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										806
									
								
								nilmdb/server/rocket.c
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,806 @@ | |||||||
|  | #include <Python.h> | ||||||
|  | #include <structmember.h> | ||||||
|  | #include <endian.h> | ||||||
|  |  | ||||||
|  | #include <ctype.h> | ||||||
|  | #include <stdint.h> | ||||||
|  |  | ||||||
|  | #define __STDC_FORMAT_MACROS | ||||||
|  | #include <inttypes.h> | ||||||
|  |  | ||||||
|  | /* Values missing from stdint.h */ | ||||||
|  | #define UINT8_MIN 0 | ||||||
|  | #define UINT16_MIN 0 | ||||||
|  | #define UINT32_MIN 0 | ||||||
|  | #define UINT64_MIN 0 | ||||||
|  |  | ||||||
|  | /* Marker values (if min == max, skip range check) */ | ||||||
|  | #define FLOAT32_MIN 0 | ||||||
|  | #define FLOAT32_MAX 0 | ||||||
|  | #define FLOAT64_MIN 0 | ||||||
|  | #define FLOAT64_MAX 0 | ||||||
|  |  | ||||||
|  | typedef int64_t timestamp_t; | ||||||
|  |  | ||||||
|  | /* Somewhat arbitrary, just so we can use fixed sizes for strings | ||||||
|  |    etc. */ | ||||||
|  | static const int MAX_LAYOUT_COUNT = 1024; | ||||||
|  |  | ||||||
|  | /* Error object and constants */ | ||||||
|  | static PyObject *ParseError; | ||||||
|  | typedef enum { | ||||||
|  | 	ERR_OTHER, | ||||||
|  | 	ERR_NON_MONOTONIC, | ||||||
|  | 	ERR_OUT_OF_INTERVAL, | ||||||
|  | } parseerror_code_t; | ||||||
|  | static void add_parseerror_codes(PyObject *module) | ||||||
|  | { | ||||||
|  | 	PyModule_AddIntMacro(module, ERR_OTHER); | ||||||
|  | 	PyModule_AddIntMacro(module, ERR_NON_MONOTONIC); | ||||||
|  | 	PyModule_AddIntMacro(module, ERR_OUT_OF_INTERVAL); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | /* Helpers to raise ParseErrors.  Use "return raise_str(...)" etc. */ | ||||||
|  | static PyObject *raise_str(int line, int col, int code, const char *string) | ||||||
|  | { | ||||||
|  | 	PyObject *o; | ||||||
|  | 	o = Py_BuildValue("(iiis)", line, col, code, string); | ||||||
|  | 	if (o != NULL) { | ||||||
|  | 		PyErr_SetObject(ParseError, o); | ||||||
|  | 		Py_DECREF(o); | ||||||
|  | 	} | ||||||
|  | 	return NULL; | ||||||
|  | } | ||||||
|  | static PyObject *raise_int(int line, int col, int code, int64_t num) | ||||||
|  | { | ||||||
|  | 	PyObject *o; | ||||||
|  | 	o = Py_BuildValue("(iiiL)", line, col, code, (long long)num); | ||||||
|  | 	if (o != NULL) { | ||||||
|  | 		PyErr_SetObject(ParseError, o); | ||||||
|  | 		Py_DECREF(o); | ||||||
|  | 	} | ||||||
|  | 	return NULL; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | /**** | ||||||
|  |  * Layout and type helpers | ||||||
|  |  */ | ||||||
|  | typedef union { | ||||||
|  | 	int8_t i; | ||||||
|  | 	uint8_t u; | ||||||
|  | } union8_t; | ||||||
|  | typedef union { | ||||||
|  | 	int16_t i; | ||||||
|  | 	uint16_t u; | ||||||
|  | } union16_t; | ||||||
|  | typedef union { | ||||||
|  | 	int32_t i; | ||||||
|  | 	uint32_t u; | ||||||
|  | 	float f; | ||||||
|  | } union32_t; | ||||||
|  | typedef union { | ||||||
|  | 	int64_t i; | ||||||
|  | 	uint64_t u; | ||||||
|  | 	double d; | ||||||
|  | } union64_t; | ||||||
|  |  | ||||||
|  | typedef enum { | ||||||
|  | 	LAYOUT_TYPE_NONE, | ||||||
|  | 	LAYOUT_TYPE_INT8, | ||||||
|  | 	LAYOUT_TYPE_UINT8, | ||||||
|  | 	LAYOUT_TYPE_INT16, | ||||||
|  | 	LAYOUT_TYPE_UINT16, | ||||||
|  | 	LAYOUT_TYPE_INT32, | ||||||
|  | 	LAYOUT_TYPE_UINT32, | ||||||
|  | 	LAYOUT_TYPE_INT64, | ||||||
|  | 	LAYOUT_TYPE_UINT64, | ||||||
|  | 	LAYOUT_TYPE_FLOAT32, | ||||||
|  | 	LAYOUT_TYPE_FLOAT64, | ||||||
|  | } layout_type_t; | ||||||
|  |  | ||||||
|  | struct { | ||||||
|  | 	char *string; | ||||||
|  | 	layout_type_t layout; | ||||||
|  | 	int size; | ||||||
|  | } type_lookup[] = { | ||||||
|  | 	{ "int8",    LAYOUT_TYPE_INT8,    1 }, | ||||||
|  | 	{ "uint8",   LAYOUT_TYPE_UINT8,   1 }, | ||||||
|  | 	{ "int16",   LAYOUT_TYPE_INT16,   2 }, | ||||||
|  | 	{ "uint16",  LAYOUT_TYPE_UINT16,  2 }, | ||||||
|  | 	{ "int32",   LAYOUT_TYPE_INT32,   4 }, | ||||||
|  | 	{ "uint32",  LAYOUT_TYPE_UINT32,  4 }, | ||||||
|  | 	{ "int64",   LAYOUT_TYPE_INT64,   8 }, | ||||||
|  | 	{ "uint64",  LAYOUT_TYPE_UINT64,  8 }, | ||||||
|  | 	{ "float32", LAYOUT_TYPE_FLOAT32, 4 }, | ||||||
|  | 	{ "float64", LAYOUT_TYPE_FLOAT64, 8 }, | ||||||
|  | 	{ NULL } | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | /**** | ||||||
|  |  * Object definition, init, etc | ||||||
|  |  */ | ||||||
|  |  | ||||||
|  | /* Rocket object */ | ||||||
|  | typedef struct { | ||||||
|  | 	PyObject_HEAD | ||||||
|  | 	layout_type_t layout_type; | ||||||
|  | 	int layout_count; | ||||||
|  | 	int binary_size; | ||||||
|  | 	FILE *file; | ||||||
|  | 	int file_size; | ||||||
|  | } Rocket; | ||||||
|  |  | ||||||
|  | /* Dealloc / new */ | ||||||
|  | static void Rocket_dealloc(Rocket *self) | ||||||
|  | { | ||||||
|  | 	if (self->file) { | ||||||
|  | 		fprintf(stderr, "rocket: file wasn't closed\n"); | ||||||
|  | 		fclose(self->file); | ||||||
|  | 		self->file = NULL; | ||||||
|  | 	} | ||||||
|  | 	Py_TYPE(self)->tp_free((PyObject *)self); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | static PyObject *Rocket_new(PyTypeObject *type, PyObject *args, PyObject *kwds) | ||||||
|  | { | ||||||
|  | 	Rocket *self; | ||||||
|  |  | ||||||
|  | 	self = (Rocket *)type->tp_alloc(type, 0); | ||||||
|  | 	if (!self) | ||||||
|  | 		return NULL; | ||||||
|  | 	self->layout_type = LAYOUT_TYPE_NONE; | ||||||
|  | 	self->layout_count = 0; | ||||||
|  | 	self->binary_size = 0; | ||||||
|  | 	self->file = NULL; | ||||||
|  | 	self->file_size = -1; | ||||||
|  | 	return (PyObject *)self; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | /* .__init__(layout, file) */ | ||||||
|  | static int Rocket_init(Rocket *self, PyObject *args, PyObject *kwds) | ||||||
|  | { | ||||||
|  | 	const char *layout, *path; | ||||||
|  |         int pathlen; | ||||||
|  | 	static char *kwlist[] = { "layout", "file", NULL }; | ||||||
|  | 	if (!PyArg_ParseTupleAndKeywords(args, kwds, "sz#", kwlist, | ||||||
|  | 					 &layout, &path, &pathlen)) | ||||||
|  | 		return -1; | ||||||
|  | 	if (!layout) | ||||||
|  | 		return -1; | ||||||
|  | 	if (path) { | ||||||
|  |                 if (strlen(path) != (size_t)pathlen) { | ||||||
|  |                         PyErr_SetString(PyExc_ValueError, "path must not " | ||||||
|  |                                         "contain NUL characters"); | ||||||
|  |                         return -1; | ||||||
|  |                 } | ||||||
|  | 		if ((self->file = fopen(path, "a+b")) == NULL) { | ||||||
|  | 			PyErr_SetFromErrno(PyExc_OSError); | ||||||
|  | 			return -1; | ||||||
|  | 		} | ||||||
|  | 		self->file_size = -1; | ||||||
|  | 	} else { | ||||||
|  | 		self->file = NULL; | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	const char *under; | ||||||
|  | 	char *tmp; | ||||||
|  | 	under = strchr(layout, '_'); | ||||||
|  | 	if (!under) { | ||||||
|  | 		PyErr_SetString(PyExc_ValueError, "no such layout: " | ||||||
|  | 				"badly formatted string"); | ||||||
|  | 		return -1; | ||||||
|  | 	} | ||||||
|  | 	self->layout_count = strtoul(under+1, &tmp, 10); | ||||||
|  | 	if (self->layout_count < 1 || *tmp != '\0') { | ||||||
|  | 		PyErr_SetString(PyExc_ValueError, "no such layout: " | ||||||
|  | 				"bad count"); | ||||||
|  | 		return -1; | ||||||
|  | 	} | ||||||
|  | 	if (self->layout_count >= MAX_LAYOUT_COUNT) { | ||||||
|  | 		PyErr_SetString(PyExc_ValueError, "no such layout: " | ||||||
|  | 				"count too high"); | ||||||
|  | 		return -1; | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	int i; | ||||||
|  | 	for (i = 0; type_lookup[i].string; i++) | ||||||
|  | 		if (strncmp(layout, type_lookup[i].string, under-layout) == 0) | ||||||
|  | 			break; | ||||||
|  | 	if (!type_lookup[i].string) { | ||||||
|  | 		PyErr_SetString(PyExc_ValueError, "no such layout: " | ||||||
|  | 				"bad data type"); | ||||||
|  | 		return -1; | ||||||
|  | 	} | ||||||
|  | 	self->layout_type = type_lookup[i].layout; | ||||||
|  | 	self->binary_size = 8 + (type_lookup[i].size * self->layout_count); | ||||||
|  |  | ||||||
|  | 	return 0; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | /* .close() */ | ||||||
|  | static PyObject *Rocket_close(Rocket *self) | ||||||
|  | { | ||||||
|  | 	if (self->file) { | ||||||
|  | 		fclose(self->file); | ||||||
|  | 		self->file = NULL; | ||||||
|  | 	} | ||||||
|  | 	Py_INCREF(Py_None); | ||||||
|  | 	return Py_None; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | /* .file_size property */ | ||||||
|  | static PyObject *Rocket_get_file_size(Rocket *self) | ||||||
|  | { | ||||||
|  | 	if (!self->file) { | ||||||
|  | 		PyErr_SetString(PyExc_AttributeError, "no file"); | ||||||
|  | 		return NULL; | ||||||
|  | 	} | ||||||
|  | 	if (self->file_size < 0) { | ||||||
|  | 		int oldpos; | ||||||
|  | 		if (((oldpos = ftell(self->file)) < 0) || | ||||||
|  | 		    (fseek(self->file, 0, SEEK_END) < 0) || | ||||||
|  | 		    ((self->file_size = ftell(self->file)) < 0) || | ||||||
|  | 		    (fseek(self->file, oldpos, SEEK_SET) < 0)) { | ||||||
|  | 			PyErr_SetFromErrno(PyExc_OSError); | ||||||
|  | 			return NULL; | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 	return PyLong_FromLong(self->file_size); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | /**** | ||||||
|  |  * Append from string | ||||||
|  |  */ | ||||||
|  | static inline long int strtoll10(const char *nptr, char **endptr) { | ||||||
|  | 	return strtoll(nptr, endptr, 10); | ||||||
|  | } | ||||||
|  | static inline long int strtoull10(const char *nptr, char **endptr) { | ||||||
|  | 	return strtoull(nptr, endptr, 10); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | /* .append_string(count, data, offset, linenum, start, end, last_timestamp) */ | ||||||
|  | static PyObject *Rocket_append_string(Rocket *self, PyObject *args) | ||||||
|  | { | ||||||
|  | 	int count; | ||||||
|  | 	const char *data; | ||||||
|  | 	int offset; | ||||||
|  | 	const char *linestart; | ||||||
|  | 	int linenum; | ||||||
|  |         long long ll1, ll2, ll3; | ||||||
|  | 	timestamp_t start; | ||||||
|  | 	timestamp_t end; | ||||||
|  | 	timestamp_t last_timestamp; | ||||||
|  |  | ||||||
|  | 	int written = 0; | ||||||
|  | 	char *endptr; | ||||||
|  | 	union8_t t8; | ||||||
|  | 	union16_t t16; | ||||||
|  | 	union32_t t32; | ||||||
|  | 	union64_t t64; | ||||||
|  | 	int i; | ||||||
|  |  | ||||||
|  |         /* Input data is bytes.  Using 'y#' instead of 'y' might be | ||||||
|  |            preferable, but strto* requires the null terminator. */ | ||||||
|  | 	if (!PyArg_ParseTuple(args, "iyiiLLL:append_string", &count, | ||||||
|  | 			      &data, &offset, &linenum, | ||||||
|  | 			      &ll1, &ll2, &ll3)) | ||||||
|  | 		return NULL; | ||||||
|  |         start = ll1; | ||||||
|  |         end = ll2; | ||||||
|  |         last_timestamp = ll3; | ||||||
|  |  | ||||||
|  | 	/* Skip spaces, but don't skip over a newline. */ | ||||||
|  | #define SKIP_BLANK(buf) do {			\ | ||||||
|  | 	while (isspace(*buf)) {			\ | ||||||
|  | 		if (*buf == '\n')		\ | ||||||
|  | 			break;			\ | ||||||
|  | 		buf++;				\ | ||||||
|  | 	} } while(0) | ||||||
|  |  | ||||||
|  | 	const char *buf = &data[offset]; | ||||||
|  | 	while (written < count && *buf) | ||||||
|  | 	{ | ||||||
|  | 		linestart = buf; | ||||||
|  | 		linenum++; | ||||||
|  |  | ||||||
|  | 		/* Skip leading whitespace and commented lines */ | ||||||
|  | 		SKIP_BLANK(buf); | ||||||
|  | 		if (*buf == '#') { | ||||||
|  | 			while (*buf && *buf != '\n') | ||||||
|  | 				buf++; | ||||||
|  | 			if (*buf) | ||||||
|  | 				buf++; | ||||||
|  | 			continue; | ||||||
|  | 		} | ||||||
|  |  | ||||||
|  | 		/* Extract timestamp */ | ||||||
|  | 		t64.i = strtoll(buf, &endptr, 10); | ||||||
|  | 		if (endptr == buf || !isspace(*endptr)) { | ||||||
|  | 			/* Try parsing as a double instead */ | ||||||
|  | 			t64.d = strtod(buf, &endptr); | ||||||
|  | 			if (endptr == buf) | ||||||
|  | 				goto bad_timestamp; | ||||||
|  | 			if (!isspace(*endptr)) | ||||||
|  | 				goto cant_parse_value; | ||||||
|  | 			t64.i = round(t64.d); | ||||||
|  | 		} | ||||||
|  | 		if (t64.i <= last_timestamp) | ||||||
|  | 			return raise_int(linenum, buf - linestart + 1, | ||||||
|  | 					 ERR_NON_MONOTONIC, t64.i); | ||||||
|  | 		last_timestamp = t64.i; | ||||||
|  | 		if (t64.i < start || t64.i >= end) | ||||||
|  | 			return raise_int(linenum, buf - linestart + 1, | ||||||
|  | 					 ERR_OUT_OF_INTERVAL, t64.i); | ||||||
|  | 		t64.u = le64toh(t64.u); | ||||||
|  | 		if (fwrite(&t64.u, 8, 1, self->file) != 1) | ||||||
|  | 			goto err; | ||||||
|  | 		buf = endptr; | ||||||
|  |  | ||||||
|  | 		/* Parse all values in the line */ | ||||||
|  | 		switch (self->layout_type) { | ||||||
|  | #define CS(type, parsefunc, parsetype, realtype, disktype, letoh, bytes) \ | ||||||
|  | 		case LAYOUT_TYPE_##type:				\ | ||||||
|  | 			/* parse and write in a loop */			\ | ||||||
|  | 			for (i = 0; i < self->layout_count; i++) {	\ | ||||||
|  | 				/* skip non-newlines */			\ | ||||||
|  | 				SKIP_BLANK(buf);			\ | ||||||
|  | 				if (*buf == '\n')			\ | ||||||
|  | 					goto wrong_number_of_values;	\ | ||||||
|  | 				/* parse number */			\ | ||||||
|  | 				parsetype = parsefunc(buf, &endptr);	\ | ||||||
|  | 				if (*endptr && !isspace(*endptr))	\ | ||||||
|  | 					goto cant_parse_value;		\ | ||||||
|  | 				/* check limits */			\ | ||||||
|  | 				if (type##_MIN != type##_MAX &&		\ | ||||||
|  | 				    (parsetype < type##_MIN ||		\ | ||||||
|  | 				     parsetype > type##_MAX))		\ | ||||||
|  | 					goto value_out_of_range;	\ | ||||||
|  | 				/* convert to disk representation */	\ | ||||||
|  | 				realtype = parsetype;			\ | ||||||
|  | 				disktype = letoh(disktype);		\ | ||||||
|  | 				/* write it */				\ | ||||||
|  | 				if (fwrite(&disktype, bytes,		\ | ||||||
|  | 					   1, self->file) != 1)		\ | ||||||
|  | 					goto err;			\ | ||||||
|  | 				/* advance buf */			\ | ||||||
|  | 				buf = endptr;				\ | ||||||
|  | 			}						\ | ||||||
|  | 			/* Skip trailing whitespace and comments */	\ | ||||||
|  | 			SKIP_BLANK(buf);				\ | ||||||
|  | 			if (*buf == '#')				\ | ||||||
|  | 				while (*buf && *buf != '\n')		\ | ||||||
|  | 					buf++;				\ | ||||||
|  | 			if (*buf == '\n')				\ | ||||||
|  | 				buf++;					\ | ||||||
|  | 			else if (*buf != '\0')				\ | ||||||
|  | 				goto extra_data_on_line;		\ | ||||||
|  | 			break | ||||||
|  |  | ||||||
|  | 			CS(INT8,   strtoll10,  t64.i, t8.i,  t8.u,         , 1); | ||||||
|  | 			CS(UINT8,  strtoull10, t64.u, t8.u,  t8.u,         , 1); | ||||||
|  | 			CS(INT16,  strtoll10,  t64.i, t16.i, t16.u, le16toh, 2); | ||||||
|  | 			CS(UINT16, strtoull10, t64.u, t16.u, t16.u, le16toh, 2); | ||||||
|  | 			CS(INT32,  strtoll10,  t64.i, t32.i, t32.u, le32toh, 4); | ||||||
|  | 			CS(UINT32, strtoull10, t64.u, t32.u, t32.u, le32toh, 4); | ||||||
|  | 			CS(INT64,  strtoll10,  t64.i, t64.i, t64.u, le64toh, 8); | ||||||
|  | 			CS(UINT64, strtoull10, t64.u, t64.u, t64.u, le64toh, 8); | ||||||
|  | 			CS(FLOAT32, strtod,   t64.d, t32.f, t32.u, le32toh, 4); | ||||||
|  | 			CS(FLOAT64, strtod,   t64.d, t64.d, t64.u, le64toh, 8); | ||||||
|  | #undef CS | ||||||
|  | 		default: | ||||||
|  | 			PyErr_SetString(PyExc_TypeError, "unknown type"); | ||||||
|  | 			return NULL; | ||||||
|  | 		} | ||||||
|  |  | ||||||
|  | 		/* Done this line */ | ||||||
|  | 		written++; | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	fflush(self->file); | ||||||
|  |  | ||||||
|  | 	/* Build return value and return */ | ||||||
|  | 	offset = buf - data; | ||||||
|  | 	PyObject *o; | ||||||
|  | 	o = Py_BuildValue("(iiLi)", written, offset, | ||||||
|  |                           (long long)last_timestamp, linenum); | ||||||
|  | 	return o; | ||||||
|  | err: | ||||||
|  | 	PyErr_SetFromErrno(PyExc_OSError); | ||||||
|  | 	return NULL; | ||||||
|  | bad_timestamp: | ||||||
|  | 	return raise_str(linenum, buf - linestart + 1, | ||||||
|  | 			 ERR_OTHER, "bad timestamp"); | ||||||
|  | cant_parse_value: | ||||||
|  | 	return raise_str(linenum, buf - linestart + 1, | ||||||
|  | 			 ERR_OTHER, "can't parse value"); | ||||||
|  | wrong_number_of_values: | ||||||
|  | 	return raise_str(linenum, buf - linestart + 1, | ||||||
|  | 			 ERR_OTHER, "wrong number of values"); | ||||||
|  | value_out_of_range: | ||||||
|  | 	return raise_str(linenum, buf - linestart + 1, | ||||||
|  | 			 ERR_OTHER, "value out of range"); | ||||||
|  | extra_data_on_line: | ||||||
|  | 	return raise_str(linenum, buf - linestart + 1, | ||||||
|  | 			 ERR_OTHER, "extra data on line"); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | /**** | ||||||
|  |  * Append from binary data | ||||||
|  |  */ | ||||||
|  |  | ||||||
|  | /* .append_binary(count, data, offset, linenum, start, end, last_timestamp) */ | ||||||
|  | static PyObject *Rocket_append_binary(Rocket *self, PyObject *args) | ||||||
|  | { | ||||||
|  |         int count; | ||||||
|  | 	const uint8_t *data; | ||||||
|  |         int data_len; | ||||||
|  |         int linenum; | ||||||
|  | 	int offset; | ||||||
|  |         long long ll1, ll2, ll3; | ||||||
|  | 	timestamp_t start; | ||||||
|  | 	timestamp_t end; | ||||||
|  | 	timestamp_t last_timestamp; | ||||||
|  |  | ||||||
|  | 	if (!PyArg_ParseTuple(args, "iy#iiLLL:append_binary", | ||||||
|  |                               &count, &data, &data_len, &offset, | ||||||
|  |                               &linenum, &ll1, &ll2, &ll3)) | ||||||
|  | 		return NULL; | ||||||
|  |         start = ll1; | ||||||
|  |         end = ll2; | ||||||
|  |         last_timestamp = ll3; | ||||||
|  |  | ||||||
|  |         /* Advance to offset */ | ||||||
|  |         if (offset > data_len) | ||||||
|  |                 return raise_str(0, 0, ERR_OTHER, "bad offset"); | ||||||
|  |         data += offset; | ||||||
|  |         data_len -= offset; | ||||||
|  |  | ||||||
|  |         /* Figure out max number of rows to insert */ | ||||||
|  |         int rows = data_len / self->binary_size; | ||||||
|  |         if (rows > count) | ||||||
|  |                 rows = count; | ||||||
|  |  | ||||||
|  |         /* Check timestamps */ | ||||||
|  |         timestamp_t ts; | ||||||
|  | 	int i; | ||||||
|  |         for (i = 0; i < rows; i++) { | ||||||
|  |                 /* Read raw timestamp, byteswap if needed */ | ||||||
|  |                 memcpy(&ts, &data[i * self->binary_size], 8); | ||||||
|  |                 ts = le64toh(ts); | ||||||
|  |  | ||||||
|  |                 /* Check limits */ | ||||||
|  |                 if (ts <= last_timestamp) | ||||||
|  |                         return raise_int(i, 0, ERR_NON_MONOTONIC, ts); | ||||||
|  |                 last_timestamp = ts; | ||||||
|  |                 if (ts < start || ts >= end) | ||||||
|  |                         return raise_int(i, 0, ERR_OUT_OF_INTERVAL, ts); | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         /* Write binary data */ | ||||||
|  |         if (fwrite(data, self->binary_size, rows, self->file) != (size_t)rows) { | ||||||
|  |                 PyErr_SetFromErrno(PyExc_OSError); | ||||||
|  |                 return NULL; | ||||||
|  |         } | ||||||
|  | 	fflush(self->file); | ||||||
|  |  | ||||||
|  | 	/* Build return value and return */ | ||||||
|  | 	PyObject *o; | ||||||
|  | 	o = Py_BuildValue("(iiLi)", rows, offset + rows * self->binary_size, | ||||||
|  |                           (long long)last_timestamp, linenum); | ||||||
|  | 	return o; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | /**** | ||||||
|  |  * Extract to binary bytes object containing ASCII text-formatted data | ||||||
|  |  */ | ||||||
|  |  | ||||||
|  | static PyObject *Rocket_extract_string(Rocket *self, PyObject *args) | ||||||
|  | { | ||||||
|  | 	long count; | ||||||
|  | 	long offset; | ||||||
|  |  | ||||||
|  | 	if (!PyArg_ParseTuple(args, "ll", &offset, &count)) | ||||||
|  | 		return NULL; | ||||||
|  | 	if (!self->file) { | ||||||
|  | 		PyErr_SetString(PyExc_Exception, "no file"); | ||||||
|  | 		return NULL; | ||||||
|  | 	} | ||||||
|  | 	/* Seek to target location */ | ||||||
|  | 	if (fseek(self->file, offset, SEEK_SET) < 0) { | ||||||
|  | 		PyErr_SetFromErrno(PyExc_OSError); | ||||||
|  | 		return NULL; | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	char *str = NULL, *new; | ||||||
|  | 	long len_alloc = 0; | ||||||
|  | 	long len = 0; | ||||||
|  | 	int ret; | ||||||
|  |  | ||||||
|  | 	/* min space free in string (and the maximum length of one | ||||||
|  | 	   line); this is generous */ | ||||||
|  | 	const int min_free = 32 * MAX_LAYOUT_COUNT; | ||||||
|  |  | ||||||
|  | 	/* how much to allocate at once */ | ||||||
|  | 	const int alloc_size = 1048576; | ||||||
|  |  | ||||||
|  | 	int row, i; | ||||||
|  | 	union8_t t8; | ||||||
|  | 	union16_t t16; | ||||||
|  | 	union32_t t32; | ||||||
|  | 	union64_t t64; | ||||||
|  | 	for (row = 0; row < count; row++) { | ||||||
|  | 		/* Make sure there's space for a line */ | ||||||
|  | 		if ((len_alloc - len) < min_free) { | ||||||
|  | 			/* grow by 1 meg at a time */ | ||||||
|  | 			len_alloc += alloc_size; | ||||||
|  | 			new = realloc(str, len_alloc); | ||||||
|  | 			if (new == NULL) | ||||||
|  | 				goto err; | ||||||
|  | 			str = new; | ||||||
|  | 		} | ||||||
|  |  | ||||||
|  | 		/* Read and print timestamp */ | ||||||
|  | 		if (fread(&t64.u, 8, 1, self->file) != 1) | ||||||
|  | 			goto err; | ||||||
|  | 		t64.u = le64toh(t64.u); | ||||||
|  | 		ret = sprintf(&str[len], "%" PRId64, t64.i); | ||||||
|  | 		if (ret <= 0) | ||||||
|  | 			goto err; | ||||||
|  | 		len += ret; | ||||||
|  |  | ||||||
|  | 		/* Read and print values */ | ||||||
|  | 		switch (self->layout_type) { | ||||||
|  | #define CASE(type, fmt, fmttype, disktype, letoh, bytes)		\ | ||||||
|  | 		case LAYOUT_TYPE_##type:				\ | ||||||
|  | 			/* read and format in a loop */			\ | ||||||
|  | 			for (i = 0; i < self->layout_count; i++) {	\ | ||||||
|  | 				if (fread(&disktype, bytes,		\ | ||||||
|  | 					  1, self->file) != 1)		\ | ||||||
|  | 					goto err;			\ | ||||||
|  | 				disktype = letoh(disktype);		\ | ||||||
|  | 				ret = sprintf(&str[len], " " fmt,	\ | ||||||
|  | 					      fmttype);			\ | ||||||
|  | 				if (ret <= 0)				\ | ||||||
|  | 					goto err;			\ | ||||||
|  | 				len += ret;				\ | ||||||
|  | 			}						\ | ||||||
|  | 			break | ||||||
|  | 			CASE(INT8,   "%" PRId8,  t8.i,  t8.u,         , 1); | ||||||
|  | 			CASE(UINT8,  "%" PRIu8,  t8.u,  t8.u,         , 1); | ||||||
|  | 			CASE(INT16,  "%" PRId16, t16.i, t16.u, le16toh, 2); | ||||||
|  | 			CASE(UINT16, "%" PRIu16, t16.u, t16.u, le16toh, 2); | ||||||
|  | 			CASE(INT32,  "%" PRId32, t32.i, t32.u, le32toh, 4); | ||||||
|  | 			CASE(UINT32, "%" PRIu32, t32.u, t32.u, le32toh, 4); | ||||||
|  | 			CASE(INT64,  "%" PRId64, t64.i, t64.u, le64toh, 8); | ||||||
|  | 			CASE(UINT64, "%" PRIu64, t64.u, t64.u, le64toh, 8); | ||||||
|  | 			/* These next two are a bit debatable.  floats | ||||||
|  | 			   are 6-9 significant figures, so we print 7. | ||||||
|  | 			   Doubles are 15-19, so we print 17.  This is | ||||||
|  | 			   similar to the old prep format for float32. | ||||||
|  | 			*/ | ||||||
|  | 			CASE(FLOAT32, "%.6e",  t32.f, t32.u, le32toh, 4); | ||||||
|  | 			CASE(FLOAT64, "%.16e", t64.d, t64.u, le64toh, 8); | ||||||
|  | #undef CASE | ||||||
|  | 		default: | ||||||
|  | 			PyErr_SetString(PyExc_TypeError, "unknown type"); | ||||||
|  | 			if (str) free(str); | ||||||
|  | 			return NULL; | ||||||
|  | 		} | ||||||
|  | 		str[len++] = '\n'; | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	PyObject *pystr = PyBytes_FromStringAndSize(str, len); | ||||||
|  | 	free(str); | ||||||
|  | 	return pystr; | ||||||
|  | err: | ||||||
|  | 	if (str) free(str); | ||||||
|  | 	PyErr_SetFromErrno(PyExc_OSError); | ||||||
|  | 	return NULL; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | /**** | ||||||
|  |  * Extract to binary bytes object containing raw little-endian binary data | ||||||
|  |  */ | ||||||
|  | static PyObject *Rocket_extract_binary(Rocket *self, PyObject *args) | ||||||
|  | { | ||||||
|  | 	long count; | ||||||
|  | 	long offset; | ||||||
|  |  | ||||||
|  | 	if (!PyArg_ParseTuple(args, "ll", &offset, &count)) | ||||||
|  | 		return NULL; | ||||||
|  | 	if (!self->file) { | ||||||
|  | 		PyErr_SetString(PyExc_Exception, "no file"); | ||||||
|  | 		return NULL; | ||||||
|  | 	} | ||||||
|  | 	/* Seek to target location */ | ||||||
|  | 	if (fseek(self->file, offset, SEEK_SET) < 0) { | ||||||
|  | 		PyErr_SetFromErrno(PyExc_OSError); | ||||||
|  | 		return NULL; | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  |         uint8_t *str; | ||||||
|  |         int len = count * self->binary_size; | ||||||
|  |         str = malloc(len); | ||||||
|  |         if (str == NULL) { | ||||||
|  |                 PyErr_SetFromErrno(PyExc_OSError); | ||||||
|  |                 return NULL; | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         /* Data in the file is already in the desired little-endian | ||||||
|  |            binary format, so just read it directly. */ | ||||||
|  |         if (fread(str, self->binary_size, count, self->file) != (size_t)count) { | ||||||
|  |                 free(str); | ||||||
|  |                 PyErr_SetFromErrno(PyExc_OSError); | ||||||
|  |                 return NULL; | ||||||
|  |         } | ||||||
|  |  | ||||||
|  | 	PyObject *pystr = PyBytes_FromStringAndSize((char *)str, len); | ||||||
|  | 	free(str); | ||||||
|  | 	return pystr; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | /**** | ||||||
|  |  * Extract timestamp | ||||||
|  |  */ | ||||||
|  | static PyObject *Rocket_extract_timestamp(Rocket *self, PyObject *args) | ||||||
|  | { | ||||||
|  | 	long offset; | ||||||
|  | 	union64_t t64; | ||||||
|  | 	if (!PyArg_ParseTuple(args, "l", &offset)) | ||||||
|  | 		return NULL; | ||||||
|  | 	if (!self->file) { | ||||||
|  | 		PyErr_SetString(PyExc_Exception, "no file"); | ||||||
|  | 		return NULL; | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	/* Seek to target location and read timestamp */ | ||||||
|  | 	if ((fseek(self->file, offset, SEEK_SET) < 0) || | ||||||
|  | 	    (fread(&t64.u, 8, 1, self->file) != 1)) { | ||||||
|  | 		PyErr_SetFromErrno(PyExc_OSError); | ||||||
|  | 		return NULL; | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	/* Convert and return */ | ||||||
|  | 	t64.u = le64toh(t64.u); | ||||||
|  | 	return Py_BuildValue("L", (long long)t64.i); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | /**** | ||||||
|  |  * Module and type setup | ||||||
|  |  */ | ||||||
|  |  | ||||||
|  | static PyGetSetDef Rocket_getsetters[] = { | ||||||
|  | 	{ "file_size", (getter)Rocket_get_file_size, NULL, | ||||||
|  | 	  "file size in bytes", NULL }, | ||||||
|  | 	{ NULL }, | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | static PyMemberDef Rocket_members[] = { | ||||||
|  | 	{ "binary_size", T_INT, offsetof(Rocket, binary_size), 0, | ||||||
|  | 	  "binary size per row" }, | ||||||
|  | 	{ NULL }, | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | static PyMethodDef Rocket_methods[] = { | ||||||
|  | 	{ "close", | ||||||
|  |           (PyCFunction)Rocket_close, METH_NOARGS, | ||||||
|  | 	  "close(self)\n\n" | ||||||
|  | 	  "Close file handle" }, | ||||||
|  |  | ||||||
|  | 	{ "append_string", | ||||||
|  |           (PyCFunction)Rocket_append_string, METH_VARARGS, | ||||||
|  | 	  "append_string(self, count, data, offset, line, start, end, ts)\n\n" | ||||||
|  |           "Parse string and append data.\n" | ||||||
|  | 	  "\n" | ||||||
|  | 	  "  count: maximum number of rows to add\n" | ||||||
|  |           "  data: string data\n" | ||||||
|  |           "  offset: byte offset into data to start parsing\n" | ||||||
|  |           "  line: current line number of data\n" | ||||||
|  |           "  start: starting timestamp for interval\n" | ||||||
|  |           "  end: end timestamp for interval\n" | ||||||
|  |           "  ts: last timestamp that was previously parsed\n" | ||||||
|  | 	  "\n" | ||||||
|  | 	  "Raises ParseError if timestamps are non-monotonic, outside\n" | ||||||
|  | 	  "the start/end interval etc.\n" | ||||||
|  | 	  "\n" | ||||||
|  |           "On success, return a tuple:\n" | ||||||
|  |           "  added_rows: how many rows were added from the file\n" | ||||||
|  |           "  data_offset: current offset into the data string\n" | ||||||
|  |           "  last_timestamp: last timestamp we parsed\n" | ||||||
|  |           "  linenum: current line number" }, | ||||||
|  |  | ||||||
|  | 	{ "append_binary", | ||||||
|  | 	  (PyCFunction)Rocket_append_binary, METH_VARARGS, | ||||||
|  | 	  "append_binary(self, count, data, offset, line, start, end, ts)\n\n" | ||||||
|  |           "Append binary data, which must match the data layout.\n" | ||||||
|  | 	  "\n" | ||||||
|  | 	  "  count: maximum number of rows to add\n" | ||||||
|  |           "  data: binary data\n" | ||||||
|  |           "  offset: byte offset into data to start adding\n" | ||||||
|  |           "  line: current line number (unused)\n" | ||||||
|  |           "  start: starting timestamp for interval\n" | ||||||
|  |           "  end: end timestamp for interval\n" | ||||||
|  |           "  ts: last timestamp that was previously parsed\n" | ||||||
|  | 	  "\n" | ||||||
|  | 	  "Raises ParseError if timestamps are non-monotonic, outside\n" | ||||||
|  | 	  "the start/end interval etc.\n" | ||||||
|  | 	  "\n" | ||||||
|  |           "On success, return a tuple:\n" | ||||||
|  |           "  added_rows: how many rows were added from the file\n" | ||||||
|  |           "  data_offset: current offset into the data string\n" | ||||||
|  |           "  last_timestamp: last timestamp we parsed\n" | ||||||
|  |           "  linenum: current line number (copied from argument)" }, | ||||||
|  |  | ||||||
|  | 	{ "extract_string", | ||||||
|  |           (PyCFunction)Rocket_extract_string, METH_VARARGS, | ||||||
|  | 	  "extract_string(self, offset, count)\n\n" | ||||||
|  | 	  "Extract count rows of data from the file at offset offset.\n" | ||||||
|  | 	  "Return an ascii formatted string according to the layout" }, | ||||||
|  |  | ||||||
|  | 	{ "extract_binary", | ||||||
|  | 	  (PyCFunction)Rocket_extract_binary, METH_VARARGS, | ||||||
|  | 	  "extract_binary(self, offset, count)\n\n" | ||||||
|  | 	  "Extract count rows of data from the file at offset offset.\n" | ||||||
|  | 	  "Return a raw binary string of data matching the data layout." }, | ||||||
|  |  | ||||||
|  | 	{ "extract_timestamp", | ||||||
|  | 	  (PyCFunction)Rocket_extract_timestamp, METH_VARARGS, | ||||||
|  | 	  "extract_timestamp(self, offset)\n\n" | ||||||
|  | 	  "Extract a single timestamp from the file" }, | ||||||
|  |  | ||||||
|  | 	{ NULL }, | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | static PyTypeObject RocketType = { | ||||||
|  | 	PyVarObject_HEAD_INIT(NULL, 0) | ||||||
|  |  | ||||||
|  | 	.tp_name	= "rocket.Rocket", | ||||||
|  | 	.tp_basicsize	= sizeof(Rocket), | ||||||
|  | 	.tp_flags	= Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, | ||||||
|  |  | ||||||
|  | 	.tp_new		= Rocket_new, | ||||||
|  | 	.tp_dealloc	= (destructor)Rocket_dealloc, | ||||||
|  | 	.tp_init	= (initproc)Rocket_init, | ||||||
|  | 	.tp_methods	= Rocket_methods, | ||||||
|  | 	.tp_members	= Rocket_members, | ||||||
|  | 	.tp_getset	= Rocket_getsetters, | ||||||
|  |  | ||||||
|  | 	.tp_doc		= ("rocket.Rocket(layout, file)\n\n" | ||||||
|  | 			   "C implementation of the \"rocket\" data parsing\n" | ||||||
|  | 			   "interface, which translates between the binary\n" | ||||||
|  | 			   "format on disk and the ASCII or Python list\n" | ||||||
|  | 			   "format used when communicating with the rest of\n" | ||||||
|  | 			   "the system.") | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | static PyMethodDef module_methods[] = { | ||||||
|  | 	{ NULL }, | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | static struct PyModuleDef moduledef = { | ||||||
|  |         PyModuleDef_HEAD_INIT, | ||||||
|  |         .m_name        = "rocker", | ||||||
|  |         .m_doc         = "Rocket data parsing and formatting module", | ||||||
|  |         .m_size        = -1, | ||||||
|  |         .m_methods     = module_methods, | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | PyMODINIT_FUNC PyInit_rocket(void) | ||||||
|  | { | ||||||
|  | 	PyObject *module; | ||||||
|  |  | ||||||
|  | 	RocketType.tp_new = PyType_GenericNew; | ||||||
|  | 	if (PyType_Ready(&RocketType) < 0) | ||||||
|  | 		return NULL; | ||||||
|  |  | ||||||
|  | 	module = PyModule_Create(&moduledef); | ||||||
|  | 	Py_INCREF(&RocketType); | ||||||
|  | 	PyModule_AddObject(module, "Rocket", (PyObject *)&RocketType); | ||||||
|  |  | ||||||
|  | 	ParseError = PyErr_NewException("rocket.ParseError", NULL, NULL); | ||||||
|  | 	Py_INCREF(ParseError); | ||||||
|  | 	PyModule_AddObject(module, "ParseError", ParseError); | ||||||
|  | 	add_parseerror_codes(module); | ||||||
|  |  | ||||||
|  | 	return module; | ||||||
|  | } | ||||||
							
								
								
									
										546
									
								
								nilmdb/server/server.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										546
									
								
								nilmdb/server/server.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,546 @@ | |||||||
|  | """CherryPy-based server for accessing NILM database via HTTP""" | ||||||
|  |  | ||||||
|  | import os | ||||||
|  | import json | ||||||
|  | import socket | ||||||
|  | import traceback | ||||||
|  |  | ||||||
|  | import psutil | ||||||
|  | import cherrypy | ||||||
|  |  | ||||||
|  | import nilmdb.server | ||||||
|  | from nilmdb.utils.printf import sprintf | ||||||
|  | from nilmdb.server.errors import NilmDBError | ||||||
|  | from nilmdb.utils.time import string_to_timestamp | ||||||
|  |  | ||||||
|  | from nilmdb.server.serverutil import ( | ||||||
|  |     chunked_response, | ||||||
|  |     response_type, | ||||||
|  |     exception_to_httperror, | ||||||
|  |     CORS_allow, | ||||||
|  |     json_to_request_params, | ||||||
|  |     json_error_page, | ||||||
|  |     cherrypy_start, | ||||||
|  |     cherrypy_stop, | ||||||
|  |     bool_param, | ||||||
|  |     ) | ||||||
|  |  | ||||||
|  | # Add CORS_allow tool | ||||||
|  | cherrypy.tools.CORS_allow = cherrypy.Tool('on_start_resource', CORS_allow) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class NilmApp(): | ||||||
|  |     def __init__(self, db): | ||||||
|  |         self.db = db | ||||||
|  |  | ||||||
|  |  | ||||||
|  | # CherryPy apps | ||||||
|  | class Root(NilmApp): | ||||||
|  |     """Root application for NILM database""" | ||||||
|  |     # / | ||||||
|  |     @cherrypy.expose | ||||||
|  |     def index(self): | ||||||
|  |         cherrypy.response.headers['Content-Type'] = 'text/plain' | ||||||
|  |         msg = sprintf("This is NilmDB version %s, running on host %s.\n", | ||||||
|  |                       nilmdb.__version__, socket.getfqdn()) | ||||||
|  |         return msg | ||||||
|  |  | ||||||
|  |     # /favicon.ico | ||||||
|  |     @cherrypy.expose | ||||||
|  |     def favicon_ico(self): | ||||||
|  |         raise cherrypy.NotFound() | ||||||
|  |  | ||||||
|  |     # /version | ||||||
|  |     @cherrypy.expose | ||||||
|  |     @cherrypy.tools.json_out() | ||||||
|  |     def version(self): | ||||||
|  |         return nilmdb.__version__ | ||||||
|  |  | ||||||
|  |     # /dbinfo | ||||||
|  |     @cherrypy.expose | ||||||
|  |     @cherrypy.tools.json_out() | ||||||
|  |     def dbinfo(self): | ||||||
|  |         """Return a dictionary with the database path, | ||||||
|  |         size of the database in bytes, and free disk space in bytes""" | ||||||
|  |         path = self.db.get_basepath() | ||||||
|  |         usage = psutil.disk_usage(path) | ||||||
|  |         dbsize = nilmdb.utils.du(path) | ||||||
|  |         return { | ||||||
|  |             "path": path, | ||||||
|  |             "size": dbsize, | ||||||
|  |             "other": max(usage.used - dbsize, 0), | ||||||
|  |             "reserved": max(usage.total - usage.used - usage.free, 0), | ||||||
|  |             "free": usage.free | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class Stream(NilmApp): | ||||||
|  |     """Stream-specific operations""" | ||||||
|  |  | ||||||
|  |     # Helpers | ||||||
|  |     def _get_times(self, start_param, end_param): | ||||||
|  |         (start, end) = (None, None) | ||||||
|  |         try: | ||||||
|  |             if start_param is not None: | ||||||
|  |                 start = string_to_timestamp(start_param) | ||||||
|  |         except Exception: | ||||||
|  |             raise cherrypy.HTTPError("400 Bad Request", sprintf( | ||||||
|  |                 "invalid start (%s): must be a numeric timestamp", | ||||||
|  |                 start_param)) | ||||||
|  |         try: | ||||||
|  |             if end_param is not None: | ||||||
|  |                 end = string_to_timestamp(end_param) | ||||||
|  |         except Exception: | ||||||
|  |             raise cherrypy.HTTPError("400 Bad Request", sprintf( | ||||||
|  |                 "invalid end (%s): must be a numeric timestamp", end_param)) | ||||||
|  |         if start is not None and end is not None: | ||||||
|  |             if start >= end: | ||||||
|  |                 raise cherrypy.HTTPError( | ||||||
|  |                     "400 Bad Request", | ||||||
|  |                     sprintf("start must precede end (%s >= %s)", | ||||||
|  |                             start_param, end_param)) | ||||||
|  |         return (start, end) | ||||||
|  |  | ||||||
|  |     # /stream/list | ||||||
|  |     # /stream/list?layout=float32_8 | ||||||
|  |     # /stream/list?path=/newton/prep&extended=1 | ||||||
|  |     @cherrypy.expose | ||||||
|  |     @cherrypy.tools.json_out() | ||||||
|  |     def list(self, path=None, layout=None, extended=None): | ||||||
|  |         """List all streams in the database.  With optional path or | ||||||
|  |         layout parameter, just list streams that match the given path | ||||||
|  |         or layout. | ||||||
|  |  | ||||||
|  |         If extended is missing or zero, returns a list of lists | ||||||
|  |         containing the path and layout: [ path, layout ] | ||||||
|  |  | ||||||
|  |         If extended is true, returns a list of lists containing | ||||||
|  |         extended info: [ path, layout, extent_min, extent_max, | ||||||
|  |         total_rows, total_seconds ].  More data may be added. | ||||||
|  |         """ | ||||||
|  |         return self.db.stream_list(path, layout, bool(extended)) | ||||||
|  |  | ||||||
|  |     # /stream/create?path=/newton/prep&layout=float32_8 | ||||||
|  |     @cherrypy.expose | ||||||
|  |     @cherrypy.tools.json_in() | ||||||
|  |     @cherrypy.tools.json_out() | ||||||
|  |     @exception_to_httperror(NilmDBError, ValueError) | ||||||
|  |     @cherrypy.tools.CORS_allow(methods=["POST"]) | ||||||
|  |     def create(self, path, layout): | ||||||
|  |         """Create a new stream in the database.  Provide path | ||||||
|  |         and one of the nilmdb.layout.layouts keys. | ||||||
|  |         """ | ||||||
|  |         return self.db.stream_create(path, layout) | ||||||
|  |  | ||||||
|  |     # /stream/destroy?path=/newton/prep | ||||||
|  |     @cherrypy.expose | ||||||
|  |     @cherrypy.tools.json_in() | ||||||
|  |     @cherrypy.tools.json_out() | ||||||
|  |     @exception_to_httperror(NilmDBError) | ||||||
|  |     @cherrypy.tools.CORS_allow(methods=["POST"]) | ||||||
|  |     def destroy(self, path): | ||||||
|  |         """Delete a stream.  Fails if any data is still present.""" | ||||||
|  |         return self.db.stream_destroy(path) | ||||||
|  |  | ||||||
|  |     # /stream/rename?oldpath=/newton/prep&newpath=/newton/prep/1 | ||||||
|  |     @cherrypy.expose | ||||||
|  |     @cherrypy.tools.json_in() | ||||||
|  |     @cherrypy.tools.json_out() | ||||||
|  |     @exception_to_httperror(NilmDBError, ValueError) | ||||||
|  |     @cherrypy.tools.CORS_allow(methods=["POST"]) | ||||||
|  |     def rename(self, oldpath, newpath): | ||||||
|  |         """Rename a stream.""" | ||||||
|  |         return self.db.stream_rename(oldpath, newpath) | ||||||
|  |  | ||||||
|  |     # /stream/get_metadata?path=/newton/prep | ||||||
|  |     # /stream/get_metadata?path=/newton/prep&key=foo&key=bar | ||||||
|  |     @cherrypy.expose | ||||||
|  |     @cherrypy.tools.json_out() | ||||||
|  |     def get_metadata(self, path, key=None): | ||||||
|  |         """Get metadata for the named stream.  If optional | ||||||
|  |         key parameters are specified, only return metadata | ||||||
|  |         matching the given keys.""" | ||||||
|  |         try: | ||||||
|  |             data = self.db.stream_get_metadata(path) | ||||||
|  |         except nilmdb.server.nilmdb.StreamError as e: | ||||||
|  |             raise cherrypy.HTTPError("404 Not Found", str(e)) | ||||||
|  |         if key is None:  # If no keys specified, return them all | ||||||
|  |             key = list(data.keys()) | ||||||
|  |         elif not isinstance(key, list): | ||||||
|  |             key = [key] | ||||||
|  |         result = {} | ||||||
|  |         for k in key: | ||||||
|  |             if k in data: | ||||||
|  |                 result[k] = data[k] | ||||||
|  |             else:  # Return "None" for keys with no matching value | ||||||
|  |                 result[k] = None | ||||||
|  |         return result | ||||||
|  |  | ||||||
|  |     # Helper for set_metadata and get_metadata | ||||||
|  |     def _metadata_helper(self, function, path, data): | ||||||
|  |         if not isinstance(data, dict): | ||||||
|  |             try: | ||||||
|  |                 data = dict(json.loads(data)) | ||||||
|  |             except TypeError as e: | ||||||
|  |                 raise NilmDBError("can't parse 'data' parameter: " + str(e)) | ||||||
|  |         for key in data: | ||||||
|  |             if not isinstance(data[key], (str, float, int)): | ||||||
|  |                 raise NilmDBError("metadata values must be a string or number") | ||||||
|  |         function(path, data) | ||||||
|  |  | ||||||
|  |     # /stream/set_metadata?path=/newton/prep&data=<json> | ||||||
|  |     @cherrypy.expose | ||||||
|  |     @cherrypy.tools.json_in() | ||||||
|  |     @cherrypy.tools.json_out() | ||||||
|  |     @exception_to_httperror(NilmDBError, LookupError) | ||||||
|  |     @cherrypy.tools.CORS_allow(methods=["POST"]) | ||||||
|  |     def set_metadata(self, path, data): | ||||||
|  |         """Set metadata for the named stream, replacing any existing | ||||||
|  |         metadata.  Data can be json-encoded or a plain dictionary.""" | ||||||
|  |         self._metadata_helper(self.db.stream_set_metadata, path, data) | ||||||
|  |  | ||||||
|  |     # /stream/update_metadata?path=/newton/prep&data=<json> | ||||||
|  |     @cherrypy.expose | ||||||
|  |     @cherrypy.tools.json_in() | ||||||
|  |     @cherrypy.tools.json_out() | ||||||
|  |     @exception_to_httperror(NilmDBError, LookupError, ValueError) | ||||||
|  |     @cherrypy.tools.CORS_allow(methods=["POST"]) | ||||||
|  |     def update_metadata(self, path, data): | ||||||
|  |         """Set metadata for the named stream, replacing any existing | ||||||
|  |         metadata.  Data can be json-encoded or a plain dictionary.""" | ||||||
|  |         self._metadata_helper(self.db.stream_update_metadata, path, data) | ||||||
|  |  | ||||||
|  |     # /stream/insert?path=/newton/prep | ||||||
|  |     @cherrypy.expose | ||||||
|  |     @cherrypy.tools.json_out() | ||||||
|  |     @exception_to_httperror(NilmDBError, ValueError) | ||||||
|  |     @cherrypy.tools.CORS_allow(methods=["PUT"]) | ||||||
|  |     def insert(self, path, start, end, binary=False): | ||||||
|  |         """ | ||||||
|  |         Insert new data into the database.  Provide textual data | ||||||
|  |         (matching the path's layout) as a HTTP PUT. | ||||||
|  |  | ||||||
|  |         If 'binary' is True, expect raw binary data, rather than lines | ||||||
|  |         of ASCII-formatted data.  Raw binary data is always | ||||||
|  |         little-endian and matches the database types (including an | ||||||
|  |         int64 timestamp). | ||||||
|  |         """ | ||||||
|  |         binary = bool_param(binary) | ||||||
|  |  | ||||||
|  |         # Important that we always read the input before throwing any | ||||||
|  |         # errors, to keep lengths happy for persistent connections. | ||||||
|  |         # Note that CherryPy 3.2.2 has a bug where this fails for GET | ||||||
|  |         # requests, if we ever want to handle those (issue #1134) | ||||||
|  |         body = cherrypy.request.body.read() | ||||||
|  |  | ||||||
|  |         # Verify content type for binary data | ||||||
|  |         content_type = cherrypy.request.headers.get('content-type') | ||||||
|  |         if binary and content_type: | ||||||
|  |             if content_type != "application/octet-stream": | ||||||
|  |                 raise cherrypy.HTTPError("400", "Content type must be " | ||||||
|  |                                          "application/octet-stream for " | ||||||
|  |                                          "binary data, not " + content_type) | ||||||
|  |  | ||||||
|  |         # Note that non-binary data is *not* decoded from bytes to string, | ||||||
|  |         # but rather passed directly to stream_insert. | ||||||
|  |  | ||||||
|  |         # Check path and get layout | ||||||
|  |         if len(self.db.stream_list(path=path)) != 1: | ||||||
|  |             raise cherrypy.HTTPError("404", "No such stream: " + path) | ||||||
|  |  | ||||||
|  |         # Check limits | ||||||
|  |         (start, end) = self._get_times(start, end) | ||||||
|  |  | ||||||
|  |         # Pass the data directly to nilmdb, which will parse it and | ||||||
|  |         # raise a ValueError if there are any problems. | ||||||
|  |         self.db.stream_insert(path, start, end, body, binary) | ||||||
|  |  | ||||||
|  |         # Done | ||||||
|  |         return | ||||||
|  |  | ||||||
|  |     # /stream/remove?path=/newton/prep | ||||||
|  |     # /stream/remove?path=/newton/prep&start=1234567890.0&end=1234567899.0 | ||||||
|  |     @cherrypy.expose | ||||||
|  |     @cherrypy.tools.json_in() | ||||||
|  |     @cherrypy.tools.CORS_allow(methods=["POST"]) | ||||||
|  |     @chunked_response | ||||||
|  |     @response_type("application/x-json-stream") | ||||||
|  |     def remove(self, path, start=None, end=None): | ||||||
|  |         """ | ||||||
|  |         Remove data from the backend database.  Removes all data in | ||||||
|  |         the interval [start, end). | ||||||
|  |  | ||||||
|  |         Returns the number of data points removed.  Since this is a potentially | ||||||
|  |         long-running operation, multiple numbers may be returned as the | ||||||
|  |         data gets removed from the backend database.  The total number of | ||||||
|  |         points removed is the sum of all of these numbers. | ||||||
|  |         """ | ||||||
|  |         (start, end) = self._get_times(start, end) | ||||||
|  |  | ||||||
|  |         if len(self.db.stream_list(path=path)) != 1: | ||||||
|  |             raise cherrypy.HTTPError("404", "No such stream: " + path) | ||||||
|  |  | ||||||
|  |         def content(start, end): | ||||||
|  |             # Note: disable chunked responses to see tracebacks from here. | ||||||
|  |             while True: | ||||||
|  |                 (removed, restart) = self.db.stream_remove(path, start, end) | ||||||
|  |                 response = json.dumps(removed) + "\r\n" | ||||||
|  |                 yield response.encode('utf-8') | ||||||
|  |                 if restart is None: | ||||||
|  |                     break | ||||||
|  |                 start = restart | ||||||
|  |         return content(start, end) | ||||||
|  |  | ||||||
|  |     # /stream/intervals?path=/newton/prep | ||||||
|  |     # /stream/intervals?path=/newton/prep&start=1234567890.0&end=1234567899.0 | ||||||
|  |     # /stream/intervals?path=/newton/prep&diffpath=/newton/prep2 | ||||||
|  |     @cherrypy.expose | ||||||
|  |     @chunked_response | ||||||
|  |     @response_type("application/x-json-stream") | ||||||
|  |     def intervals(self, path, start=None, end=None, diffpath=None): | ||||||
|  |         """ | ||||||
|  |         Get intervals from backend database.  Streams the resulting | ||||||
|  |         intervals as JSON strings separated by CR LF pairs.  This may | ||||||
|  |         make multiple requests to the nilmdb backend to avoid causing | ||||||
|  |         it to block for too long. | ||||||
|  |  | ||||||
|  |         Returns intervals between 'start' and 'end' belonging to | ||||||
|  |         'path'.  If 'diff' is provided, the set-difference between | ||||||
|  |         intervals in 'path' and intervals in 'diffpath' are | ||||||
|  |         returned instead. | ||||||
|  |  | ||||||
|  |         Note that the response type is the non-standard | ||||||
|  |         'application/x-json-stream' for lack of a better option. | ||||||
|  |         """ | ||||||
|  |         (start, end) = self._get_times(start, end) | ||||||
|  |  | ||||||
|  |         if len(self.db.stream_list(path=path)) != 1: | ||||||
|  |             raise cherrypy.HTTPError("404", "No such stream: " + path) | ||||||
|  |  | ||||||
|  |         if diffpath and len(self.db.stream_list(path=diffpath)) != 1: | ||||||
|  |             raise cherrypy.HTTPError("404", "No such stream: " + diffpath) | ||||||
|  |  | ||||||
|  |         def content(start, end): | ||||||
|  |             # Note: disable chunked responses to see tracebacks from here. | ||||||
|  |             while True: | ||||||
|  |                 (ints, restart) = self.db.stream_intervals(path, start, end, | ||||||
|  |                                                            diffpath) | ||||||
|  |                 response = ''.join([json.dumps(i) + "\r\n" for i in ints]) | ||||||
|  |                 yield response.encode('utf-8') | ||||||
|  |                 if restart is None: | ||||||
|  |                     break | ||||||
|  |                 start = restart | ||||||
|  |         return content(start, end) | ||||||
|  |  | ||||||
|  |     # /stream/extract?path=/newton/prep&start=1234567890.0&end=1234567899.0 | ||||||
|  |     @cherrypy.expose | ||||||
|  |     @chunked_response | ||||||
|  |     def extract(self, path, start=None, end=None, | ||||||
|  |                 count=False, markup=False, binary=False): | ||||||
|  |         """ | ||||||
|  |         Extract data from backend database.  Streams the resulting | ||||||
|  |         entries as ASCII text lines separated by newlines.  This may | ||||||
|  |         make multiple requests to the nilmdb backend to avoid causing | ||||||
|  |         it to block for too long. | ||||||
|  |  | ||||||
|  |         If 'count' is True, returns a count rather than actual data. | ||||||
|  |  | ||||||
|  |         If 'markup' is True, adds comments to the stream denoting each | ||||||
|  |         interval's start and end timestamp. | ||||||
|  |  | ||||||
|  |         If 'binary' is True, return raw binary data, rather than lines | ||||||
|  |         of ASCII-formatted data.  Raw binary data is always | ||||||
|  |         little-endian and matches the database types (including an | ||||||
|  |         int64 timestamp). | ||||||
|  |         """ | ||||||
|  |         binary = bool_param(binary) | ||||||
|  |         markup = bool_param(markup) | ||||||
|  |         count = bool_param(count) | ||||||
|  |  | ||||||
|  |         (start, end) = self._get_times(start, end) | ||||||
|  |  | ||||||
|  |         # Check path and get layout | ||||||
|  |         if len(self.db.stream_list(path=path)) != 1: | ||||||
|  |             raise cherrypy.HTTPError("404", "No such stream: " + path) | ||||||
|  |  | ||||||
|  |         if binary: | ||||||
|  |             content_type = "application/octet-stream" | ||||||
|  |             if markup or count: | ||||||
|  |                 raise cherrypy.HTTPError("400", "can't mix binary and " | ||||||
|  |                                          "markup or count modes") | ||||||
|  |         else: | ||||||
|  |             content_type = "text/plain" | ||||||
|  |         cherrypy.response.headers['Content-Type'] = content_type | ||||||
|  |  | ||||||
|  |         def content(start, end): | ||||||
|  |             # Note: disable chunked responses to see tracebacks from here. | ||||||
|  |             if count: | ||||||
|  |                 matched = self.db.stream_extract(path, start, end, | ||||||
|  |                                                  count=True) | ||||||
|  |                 yield sprintf(b"%d\n", matched) | ||||||
|  |                 return | ||||||
|  |  | ||||||
|  |             while True: | ||||||
|  |                 (data, restart) = self.db.stream_extract( | ||||||
|  |                     path, start, end, count=False, | ||||||
|  |                     markup=markup, binary=binary) | ||||||
|  |                 yield data | ||||||
|  |  | ||||||
|  |                 if restart is None: | ||||||
|  |                     return | ||||||
|  |                 start = restart | ||||||
|  |         return content(start, end) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class Exiter(): | ||||||
|  |     """App that exits the server, for testing""" | ||||||
|  |     @cherrypy.expose | ||||||
|  |     def index(self): | ||||||
|  |         cherrypy.response.headers['Content-Type'] = 'text/plain' | ||||||
|  |  | ||||||
|  |         def content(): | ||||||
|  |             yield b'Exiting by request' | ||||||
|  |             raise SystemExit | ||||||
|  |  | ||||||
|  |         return content() | ||||||
|  |     index._cp_config = {'response.stream': True} | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class Server(): | ||||||
|  |     def __init__(self, db, host='127.0.0.1', port=8080, | ||||||
|  |                  stoppable=False,        # whether /exit URL exists | ||||||
|  |                  fast_shutdown=False,    # don't wait for clients to disconn. | ||||||
|  |                  force_traceback=False,  # include traceback in all errors | ||||||
|  |                  basepath='',            # base URL path for cherrypy.tree | ||||||
|  |                  ): | ||||||
|  |         # Save server version, just for verification during tests | ||||||
|  |         self.version = nilmdb.__version__ | ||||||
|  |  | ||||||
|  |         self.db = db | ||||||
|  |         if not getattr(db, "_thread_safe", None): | ||||||
|  |             raise KeyError("Database object " + str(db) + " doesn't claim " | ||||||
|  |                            "to be thread safe.  You should pass " | ||||||
|  |                            "nilmdb.utils.serializer_proxy(NilmDB)(args) " | ||||||
|  |                            "rather than NilmDB(args).") | ||||||
|  |  | ||||||
|  |         # Build up global server configuration | ||||||
|  |         cherrypy.config.update({ | ||||||
|  |             'environment': 'embedded', | ||||||
|  |             'server.socket_host': host, | ||||||
|  |             'server.socket_port': port, | ||||||
|  |             'engine.autoreload.on': False, | ||||||
|  |             'server.max_request_body_size': 8*1024*1024, | ||||||
|  |             }) | ||||||
|  |  | ||||||
|  |         # Build up application specific configuration | ||||||
|  |         app_config = {} | ||||||
|  |         app_config.update({ | ||||||
|  |             'error_page.default': self.json_error_page, | ||||||
|  |             }) | ||||||
|  |  | ||||||
|  |         # Some default headers to just help identify that things are working | ||||||
|  |         app_config.update({'response.headers.X-Jim-Is-Awesome': 'yeah'}) | ||||||
|  |  | ||||||
|  |         # Set up Cross-Origin Resource Sharing (CORS) handler so we | ||||||
|  |         # can correctly respond to browsers' CORS preflight requests. | ||||||
|  |         # This also limits verbs to GET and HEAD by default. | ||||||
|  |         app_config.update({'tools.CORS_allow.on': True, | ||||||
|  |                            'tools.CORS_allow.methods': ['GET', 'HEAD']}) | ||||||
|  |  | ||||||
|  |         # Configure the 'json_in' tool to also allow other content-types | ||||||
|  |         # (like x-www-form-urlencoded), and to treat JSON as a dict that | ||||||
|  |         # fills requests.param. | ||||||
|  |         app_config.update({'tools.json_in.force': False, | ||||||
|  |                            'tools.json_in.processor': json_to_request_params}) | ||||||
|  |  | ||||||
|  |         # Send tracebacks in error responses.  They're hidden by the | ||||||
|  |         # error_page function for client errors (code 400-499). | ||||||
|  |         app_config.update({'request.show_tracebacks': True}) | ||||||
|  |         self.force_traceback = force_traceback | ||||||
|  |  | ||||||
|  |         # Patch CherryPy error handler to never pad out error messages. | ||||||
|  |         # This isn't necessary, but then again, neither is padding the | ||||||
|  |         # error messages. | ||||||
|  |         cherrypy._cperror._ie_friendly_error_sizes = {} | ||||||
|  |  | ||||||
|  |         # Build up the application and mount it | ||||||
|  |         root = Root(self.db) | ||||||
|  |         root.stream = Stream(self.db) | ||||||
|  |         if stoppable: | ||||||
|  |             root.exit = Exiter() | ||||||
|  |         cherrypy.tree.apps = {} | ||||||
|  |         cherrypy.tree.mount(root, basepath, config={"/": app_config}) | ||||||
|  |  | ||||||
|  |         # Shutdowns normally wait for clients to disconnect.  To speed | ||||||
|  |         # up tests, set fast_shutdown = True | ||||||
|  |         if fast_shutdown: | ||||||
|  |             cherrypy.server.shutdown_timeout = 0 | ||||||
|  |         else: | ||||||
|  |             cherrypy.server.shutdown_timeout = 5 | ||||||
|  |  | ||||||
|  |         # Set up the WSGI application pointer for external programs | ||||||
|  |         self.wsgi_application = cherrypy.tree | ||||||
|  |  | ||||||
|  |     def json_error_page(self, status, message, traceback, version): | ||||||
|  |         """Return a custom error page in JSON so the client can parse it""" | ||||||
|  |         return json_error_page(status, message, traceback, version, | ||||||
|  |                                self.force_traceback) | ||||||
|  |  | ||||||
|  |     def start(self, blocking=False, event=None): | ||||||
|  |         cherrypy_start(blocking, event) | ||||||
|  |  | ||||||
|  |     def stop(self): | ||||||
|  |         cherrypy_stop() | ||||||
|  |  | ||||||
|  |  | ||||||
|  | # Use a single global nilmdb.server.NilmDB and nilmdb.server.Server | ||||||
|  | # instance since the database can only be opened once.  For this to | ||||||
|  | # work, the web server must use only a single process and single | ||||||
|  | # Python interpreter.  Multiple threads are OK. | ||||||
|  | _wsgi_server = None | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def wsgi_application(dbpath, basepath): | ||||||
|  |     """Return a WSGI application object with a database at the | ||||||
|  |     specified path. | ||||||
|  |  | ||||||
|  |     'dbpath' is a filesystem location, e.g. /home/nilm/db | ||||||
|  |  | ||||||
|  |     'basepath' is the URL path of the application base, which | ||||||
|  |     is the same as the first argument to Apache's WSGIScriptAlias | ||||||
|  |     directive. | ||||||
|  |     """ | ||||||
|  |     def application(environ, start_response): | ||||||
|  |         global _wsgi_server | ||||||
|  |         if _wsgi_server is None: | ||||||
|  |             # Try to start the server | ||||||
|  |             try: | ||||||
|  |                 db = nilmdb.utils.serializer_proxy( | ||||||
|  |                     nilmdb.server.NilmDB)(dbpath) | ||||||
|  |                 _wsgi_server = nilmdb.server.Server( | ||||||
|  |                     db, basepath=basepath.rstrip('/')) | ||||||
|  |             except Exception: | ||||||
|  |                 # Build an error message on failure | ||||||
|  |                 import pprint | ||||||
|  |                 err = sprintf("Initializing database at path '%s' failed:\n\n", | ||||||
|  |                               dbpath) | ||||||
|  |                 err += traceback.format_exc() | ||||||
|  |                 import pwd | ||||||
|  |                 import grp | ||||||
|  |                 err += sprintf("\nRunning as: uid=%d (%s), gid=%d (%s) " | ||||||
|  |                                "on host %s, pid %d\n", | ||||||
|  |                                os.getuid(), pwd.getpwuid(os.getuid())[0], | ||||||
|  |                                os.getgid(), grp.getgrgid(os.getgid())[0], | ||||||
|  |                                socket.gethostname(), os.getpid()) | ||||||
|  |                 err += sprintf("\nEnvironment:\n%s\n", pprint.pformat(environ)) | ||||||
|  |         if _wsgi_server is None: | ||||||
|  |             # Serve up the error with our own mini WSGI app. | ||||||
|  |             err_b = err.encode('utf-8') | ||||||
|  |             headers = [('Content-type', 'text/plain; charset=utf-8'), | ||||||
|  |                        ('Content-length', str(len(err_b)))] | ||||||
|  |             start_response("500 Internal Server Error", headers) | ||||||
|  |             return [err_b] | ||||||
|  |  | ||||||
|  |         # Call the normal application | ||||||
|  |         return _wsgi_server.wsgi_application(environ, start_response) | ||||||
|  |     return application | ||||||
							
								
								
									
										225
									
								
								nilmdb/server/serverutil.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										225
									
								
								nilmdb/server/serverutil.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,225 @@ | |||||||
|  | """Miscellaneous decorators and other helpers for running a CherryPy | ||||||
|  | server""" | ||||||
|  |  | ||||||
|  | import os | ||||||
|  | import sys | ||||||
|  | import json | ||||||
|  | import decorator | ||||||
|  | import functools | ||||||
|  | import threading | ||||||
|  |  | ||||||
|  | import cherrypy | ||||||
|  |  | ||||||
|  |  | ||||||
|  | # Helper to parse parameters into booleans | ||||||
|  | def bool_param(s): | ||||||
|  |     """Return a bool indicating whether parameter 's' was True or False, | ||||||
|  |     supporting a few different types for 's'.""" | ||||||
|  |     try: | ||||||
|  |         ss = s.lower() | ||||||
|  |         if ss in ["0", "false", "f", "no", "n"]: | ||||||
|  |             return False | ||||||
|  |         if ss in ["1", "true", "t", "yes", "y"]: | ||||||
|  |             return True | ||||||
|  |     except Exception: | ||||||
|  |         return bool(s) | ||||||
|  |     raise cherrypy.HTTPError("400 Bad Request", | ||||||
|  |                              "can't parse parameter: " + ss) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | # Decorators | ||||||
|  | def chunked_response(func): | ||||||
|  |     """Decorator to enable chunked responses.""" | ||||||
|  |     # Set this to False to get better tracebacks from some requests | ||||||
|  |     # (/stream/extract, /stream/intervals). | ||||||
|  |     func._cp_config = {'response.stream': True} | ||||||
|  |     return func | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def response_type(content_type): | ||||||
|  |     """Return a decorator-generating function that sets the | ||||||
|  |     response type to the specified string.""" | ||||||
|  |     def wrapper(func, *args, **kwargs): | ||||||
|  |         cherrypy.response.headers['Content-Type'] = content_type | ||||||
|  |         return func(*args, **kwargs) | ||||||
|  |     return decorator.decorator(wrapper) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def exception_to_httperror(*expected): | ||||||
|  |     """Return a decorator-generating function that catches expected | ||||||
|  |     errors and throws a HTTPError describing it instead. | ||||||
|  |  | ||||||
|  |         @exception_to_httperror(NilmDBError, ValueError) | ||||||
|  |         def foo(): | ||||||
|  |             pass | ||||||
|  |     """ | ||||||
|  |     def wrapper(func, *args, **kwargs): | ||||||
|  |         exc_info = None | ||||||
|  |         try: | ||||||
|  |             return func(*args, **kwargs) | ||||||
|  |         except expected: | ||||||
|  |             # Re-raise it, but maintain the original traceback | ||||||
|  |             exc_info = sys.exc_info() | ||||||
|  |             new_exc = cherrypy.HTTPError("400 Bad Request", str(exc_info[1])) | ||||||
|  |             raise new_exc.with_traceback(exc_info[2]) | ||||||
|  |         finally: | ||||||
|  |             del exc_info | ||||||
|  |     # We need to preserve the function's argspecs for CherryPy to | ||||||
|  |     # handle argument errors correctly.  Decorator.decorator takes | ||||||
|  |     # care of that. | ||||||
|  |     return decorator.decorator(wrapper) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | # Custom CherryPy tools | ||||||
|  | def CORS_allow(methods): | ||||||
|  |     """This does several things: | ||||||
|  |  | ||||||
|  |     Handles CORS preflight requests. | ||||||
|  |     Adds Allow: header to all requests. | ||||||
|  |     Raise 405 if request.method not in method. | ||||||
|  |  | ||||||
|  |     It is similar to cherrypy.tools.allow, with the CORS stuff added. | ||||||
|  |  | ||||||
|  |     Add this to CherryPy with: | ||||||
|  |     cherrypy.tools.CORS_allow = cherrypy.Tool('on_start_resource', CORS_allow) | ||||||
|  |     """ | ||||||
|  |     request = cherrypy.request.headers | ||||||
|  |     response = cherrypy.response.headers | ||||||
|  |  | ||||||
|  |     if not isinstance(methods, (tuple, list)): | ||||||
|  |         methods = [methods] | ||||||
|  |     methods = [m.upper() for m in methods if m] | ||||||
|  |     if not methods: | ||||||
|  |         methods = ['GET', 'HEAD'] | ||||||
|  |     elif 'GET' in methods and 'HEAD' not in methods: | ||||||
|  |         methods.append('HEAD') | ||||||
|  |     response['Allow'] = ', '.join(methods) | ||||||
|  |  | ||||||
|  |     # Allow all origins | ||||||
|  |     if 'Origin' in request: | ||||||
|  |         response['Access-Control-Allow-Origin'] = request['Origin'] | ||||||
|  |  | ||||||
|  |     # If it's a CORS request, send response. | ||||||
|  |     request_method = request.get("Access-Control-Request-Method", None) | ||||||
|  |     request_headers = request.get("Access-Control-Request-Headers", None) | ||||||
|  |     if (cherrypy.request.method == "OPTIONS" and | ||||||
|  |             request_method and request_headers): | ||||||
|  |         response['Access-Control-Allow-Headers'] = request_headers | ||||||
|  |         response['Access-Control-Allow-Methods'] = ', '.join(methods) | ||||||
|  |         # Try to stop further processing and return a 200 OK | ||||||
|  |         cherrypy.response.status = "200 OK" | ||||||
|  |         cherrypy.response.body = b"" | ||||||
|  |         cherrypy.request.handler = lambda: "" | ||||||
|  |         return | ||||||
|  |  | ||||||
|  |     # Reject methods that were not explicitly allowed | ||||||
|  |     if cherrypy.request.method not in methods: | ||||||
|  |         raise cherrypy.HTTPError(405) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | # Helper for json_in tool to process JSON data into normal request | ||||||
|  | # parameters. | ||||||
|  | def json_to_request_params(body): | ||||||
|  |     cherrypy.lib.jsontools.json_processor(body) | ||||||
|  |     if not isinstance(cherrypy.request.json, dict): | ||||||
|  |         raise cherrypy.HTTPError(415) | ||||||
|  |     cherrypy.request.params.update(cherrypy.request.json) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | # Used as an "error_page.default" handler | ||||||
|  | def json_error_page(status, message, traceback, version, | ||||||
|  |                     force_traceback=False): | ||||||
|  |     """Return a custom error page in JSON so the client can parse it""" | ||||||
|  |     errordata = {"status": status, | ||||||
|  |                  "message": message, | ||||||
|  |                  "version": version, | ||||||
|  |                  "traceback": traceback} | ||||||
|  |     # Don't send a traceback if the error was 400-499 (client's fault) | ||||||
|  |     code = int(status.split()[0]) | ||||||
|  |     if not force_traceback: | ||||||
|  |         if 400 <= code <= 499: | ||||||
|  |             errordata["traceback"] = "" | ||||||
|  |     # Override the response type, which was previously set to text/html | ||||||
|  |     cherrypy.serving.response.headers['Content-Type'] = ( | ||||||
|  |         "application/json;charset=utf-8") | ||||||
|  |     # Undo the HTML escaping that cherrypy's get_error_page function applies | ||||||
|  |     # (cherrypy issue 1135) | ||||||
|  |     for k, v in errordata.items(): | ||||||
|  |         v = v.replace("<", "<") | ||||||
|  |         v = v.replace(">", ">") | ||||||
|  |         v = v.replace("&", "&") | ||||||
|  |         errordata[k] = v | ||||||
|  |     return json.dumps(errordata, separators=(',', ':')) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class CherryPyExit(SystemExit): | ||||||
|  |     pass | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def cherrypy_patch_exit(): | ||||||
|  |     # Cherrypy stupidly calls os._exit(70) when it can't bind the port | ||||||
|  |     # and exits.  Instead of that, raise a CherryPyExit (derived from | ||||||
|  |     # SystemExit).  This exception may not make it back up to the caller | ||||||
|  |     # due to internal thread use in the CherryPy engine, but there should | ||||||
|  |     # be at least some indication that it happened. | ||||||
|  |     bus = cherrypy.process.wspbus.bus | ||||||
|  |     if "_patched_exit" in bus.__dict__: | ||||||
|  |         return | ||||||
|  |     bus._patched_exit = True | ||||||
|  |  | ||||||
|  |     def patched_exit(orig): | ||||||
|  |         real_exit = os._exit | ||||||
|  |  | ||||||
|  |         def fake_exit(code): | ||||||
|  |             raise CherryPyExit(code) | ||||||
|  |         os._exit = fake_exit | ||||||
|  |         try: | ||||||
|  |             orig() | ||||||
|  |         finally: | ||||||
|  |             os._exit = real_exit | ||||||
|  |     bus.exit = functools.partial(patched_exit, bus.exit) | ||||||
|  |  | ||||||
|  |     # A behavior change in Python 3.8 means that some thread exceptions, | ||||||
|  |     # derived from SystemExit, now print tracebacks where they didn't | ||||||
|  |     # used to: https://bugs.python.org/issue1230540 | ||||||
|  |     # Install a thread exception hook that ignores CherryPyExit; | ||||||
|  |     # to make this match the behavior where we didn't set | ||||||
|  |     # threading.excepthook, we also need to ignore SystemExit. | ||||||
|  |     def hook(args): | ||||||
|  |         if args.exc_type == CherryPyExit or args.exc_type == SystemExit: | ||||||
|  |             return | ||||||
|  |         sys.excepthook(args.exc_type, args.exc_value, | ||||||
|  |                        args.exc_traceback)  # pragma: no cover | ||||||
|  |     threading.excepthook = hook | ||||||
|  |  | ||||||
|  |  | ||||||
|  | # Start/stop CherryPy standalone server | ||||||
|  | def cherrypy_start(blocking=False, event=False): | ||||||
|  |     """Start the CherryPy server, handling errors and signals | ||||||
|  |     somewhat gracefully.""" | ||||||
|  |  | ||||||
|  |     cherrypy_patch_exit() | ||||||
|  |  | ||||||
|  |     # Start the server | ||||||
|  |     cherrypy.engine.start() | ||||||
|  |  | ||||||
|  |     # Signal that the engine has started successfully | ||||||
|  |     if event is not None: | ||||||
|  |         event.set() | ||||||
|  |  | ||||||
|  |     if blocking: | ||||||
|  |         try: | ||||||
|  |             cherrypy.engine.wait(cherrypy.engine.states.EXITING, | ||||||
|  |                                  interval=0.1, channel='main') | ||||||
|  |         except (KeyboardInterrupt, IOError): | ||||||
|  |             cherrypy.engine.log('Keyboard Interrupt: shutting down') | ||||||
|  |             cherrypy.engine.exit() | ||||||
|  |         except SystemExit: | ||||||
|  |             cherrypy.engine.log('SystemExit raised: shutting down') | ||||||
|  |             cherrypy.engine.exit() | ||||||
|  |             raise | ||||||
|  |  | ||||||
|  |  | ||||||
|  | # Stop CherryPy server | ||||||
|  | def cherrypy_stop(): | ||||||
|  |     cherrypy.engine.exit() | ||||||
| @@ -1,46 +0,0 @@ | |||||||
| from nilmdb import Interval, IntervalSet, IntervalError, FileInterval |  | ||||||
| from datetime import datetime |  | ||||||
| from nose.tools import assert_raises |  | ||||||
|  |  | ||||||
| from test_interval import iset |  | ||||||
|  |  | ||||||
| def fiset(string): |  | ||||||
|     """Like iset, but builds with FileIntervals instead of Intervals""" |  | ||||||
|     iset = IntervalSet() |  | ||||||
|     for i, c in enumerate(string): |  | ||||||
|         day = datetime.strptime("{0:04d}".format(i+2000), "%Y") |  | ||||||
|         if (c == "["): |  | ||||||
|             start = day |  | ||||||
|         elif (c == "|"): |  | ||||||
|             iset += FileInterval(start, day, "test.dat") |  | ||||||
|             start = day |  | ||||||
|         elif (c == "]"): |  | ||||||
|             iset += FileInterval(start, day, "test.dat") |  | ||||||
|             del start |  | ||||||
|     return iset |  | ||||||
|  |  | ||||||
| def test_fileinterval_vs_interval(): |  | ||||||
|     """Test FileInterval/Interval inheritance""" |  | ||||||
|  |  | ||||||
|     i = iset("[--]") |  | ||||||
|     f = fiset("[--]") |  | ||||||
|  |  | ||||||
|     # check types |  | ||||||
|     assert(isinstance(i[0], Interval)) |  | ||||||
|     assert(not isinstance(i[0], FileInterval)) |  | ||||||
|     assert(isinstance(f[0], Interval)) |  | ||||||
|     assert(isinstance(f[0], FileInterval)) |  | ||||||
|  |  | ||||||
|     # when doing an intersection, result should be a subset of the first arg |  | ||||||
|     u = (i & f) |  | ||||||
|     assert(isinstance(u[0], Interval)) |  | ||||||
|     assert(not isinstance(u[0], FileInterval)) |  | ||||||
|     u = (f & i) |  | ||||||
|     assert(isinstance(u[0], Interval)) |  | ||||||
|     assert(isinstance(u[0], FileInterval)) |  | ||||||
|  |  | ||||||
|     # they're still the same though |  | ||||||
|     assert(i == f == u) |  | ||||||
|  |  | ||||||
|     # just for coverage |  | ||||||
|     assert_raises(IntervalError, fiset("[]")[0].subset, f[0].start, f[0].end) |  | ||||||
| @@ -1,189 +0,0 @@ | |||||||
| from nilmdb import Interval, IntervalSet, IntervalError |  | ||||||
| from datetime import datetime |  | ||||||
| from nose.tools import assert_raises |  | ||||||
| import itertools  |  | ||||||
|  |  | ||||||
| def test_interval(): |  | ||||||
|     """Test the Interval class""" |  | ||||||
|     d1 = datetime.strptime("19801205","%Y%m%d") |  | ||||||
|     d2 = datetime.strptime("19900216","%Y%m%d") |  | ||||||
|     d3 = datetime.strptime("20111205","%Y%m%d") |  | ||||||
|  |  | ||||||
|     # basic construction |  | ||||||
|     i = Interval(d1, d1) |  | ||||||
|     i = Interval(d1, d3) |  | ||||||
|     assert(i.start == d1) |  | ||||||
|     assert(i.end == d3) |  | ||||||
|  |  | ||||||
|     # assignment should work |  | ||||||
|     i.start = d2 |  | ||||||
|     try: |  | ||||||
|         i.end = d1 |  | ||||||
|         raise Exception("should have died there") |  | ||||||
|     except IntervalError: |  | ||||||
|         pass |  | ||||||
|     i.start = d1 |  | ||||||
|     i.end = d2 |  | ||||||
|  |  | ||||||
|     # end before start |  | ||||||
|     assert_raises(IntervalError, Interval, d3, d1) |  | ||||||
|  |  | ||||||
|     # wrong type |  | ||||||
|     assert_raises(IntervalError, Interval, 1, 2) |  | ||||||
|  |  | ||||||
|     # compare |  | ||||||
|     assert(Interval(d1, d2) == Interval(d1, d2)) |  | ||||||
|     assert(Interval(d1, d2) < Interval(d1, d3)) |  | ||||||
|     assert(Interval(d1, d3) > Interval(d1, d2)) |  | ||||||
|     assert(Interval(d1, d2) < Interval(d2, d3)) |  | ||||||
|     assert(Interval(d1, d3) < Interval(d2, d3)) |  | ||||||
|     assert(Interval(d2, d2) > Interval(d1, d3)) |  | ||||||
|     assert(Interval(d3, d3) == Interval(d3, d3)) |  | ||||||
|     assert_raises(TypeError, cmp, i, 123) |  | ||||||
|  |  | ||||||
|     # subset |  | ||||||
|     assert(Interval(d1, d3).subset(d1, d2) == Interval(d1, d2)) |  | ||||||
|     assert_raises(IntervalError, Interval(d2, d3).subset, d1, d2) |  | ||||||
|  |  | ||||||
|     # append |  | ||||||
|     assert(Interval(d1, d2).is_adjacent(Interval(d2,d3))) |  | ||||||
|     assert(Interval(d2, d3).is_adjacent(Interval(d1,d2))) |  | ||||||
|     assert(not Interval(d2, d3).is_adjacent(Interval(d1,d3))) |  | ||||||
|     assert_raises(TypeError, Interval(d1, d2).is_adjacent, 1) |  | ||||||
|  |  | ||||||
|     # misc |  | ||||||
|     assert(repr(i) == repr(eval(repr(i).replace("datetime.","")))) |  | ||||||
|     assert(str(i) == "[1980-12-05 00:00:00 -> 1990-02-16 00:00:00]") |  | ||||||
|  |  | ||||||
| def test_interval_intersect(): |  | ||||||
|     """Test Interval intersections""" |  | ||||||
|     dates = [ datetime.strptime(year, "%y") for year in [ "00", "01", "02", "03" ] ] |  | ||||||
|     perm = list(itertools.permutations(dates, 2)) |  | ||||||
|     prod = list(itertools.product(perm, perm)) |  | ||||||
|     should_intersect = { |  | ||||||
|         False: [4, 5, 8, 20, 48, 56, 60, 96, 97, 100], |  | ||||||
|         True: [0, 1, 2, 12, 13, 14, 16, 17, 24, 25, 26, 28, 29, |  | ||||||
|                32, 49, 50, 52, 53, 61, 62, 64, 65, 68, 98, 101, 104]} |  | ||||||
|     for i,((a,b),(c,d)) in enumerate(prod): |  | ||||||
|         try: |  | ||||||
|             i1 = Interval(a, b) |  | ||||||
|             i2 = Interval(c, d) |  | ||||||
|             assert(i1.intersects(i2) == i2.intersects(i1)) |  | ||||||
|             assert(i in should_intersect[i1.intersects(i2)]) |  | ||||||
|         except IntervalError: |  | ||||||
|             assert(i not in should_intersect[True] and |  | ||||||
|                    i not in should_intersect[False]) |  | ||||||
|     assert_raises(TypeError, i1.intersects, 1234) |  | ||||||
|  |  | ||||||
| def test_intervalset_construct(): |  | ||||||
|     """Test interval set construction""" |  | ||||||
|     dates = [ datetime.strptime(year, "%y") for year in [ "00", "01", "02", "03" ]] |  | ||||||
|  |  | ||||||
|     a = Interval(dates[0], dates[1]) |  | ||||||
|     b = Interval(dates[1], dates[2]) |  | ||||||
|     c = Interval(dates[0], dates[2]) |  | ||||||
|     d = Interval(dates[2], dates[3]) |  | ||||||
|  |  | ||||||
|     iseta = IntervalSet(a) |  | ||||||
|     isetb = IntervalSet([a, b]) |  | ||||||
|     isetc = IntervalSet([a]) |  | ||||||
|     assert(iseta != isetb) |  | ||||||
|     assert(iseta == isetc) |  | ||||||
|     assert(iseta != 3) |  | ||||||
|     assert(IntervalSet(a) != IntervalSet(b)) |  | ||||||
|  |  | ||||||
|     print iseta == None |  | ||||||
|     assert_raises(TypeError, cmp, iseta, isetb) |  | ||||||
|     assert_raises(IntervalError, IntervalSet, [a, b, c]) |  | ||||||
|     assert_raises(TypeError, IntervalSet, [1, 2]) |  | ||||||
|  |  | ||||||
|     iset = IntervalSet(isetb)   # test iterator |  | ||||||
|     assert(iset == isetb) |  | ||||||
|     assert(len(iset) == 2) |  | ||||||
|     assert(len(IntervalSet()) == 0) |  | ||||||
|  |  | ||||||
|     # Test adding |  | ||||||
|     iset = IntervalSet(a) |  | ||||||
|     iset += IntervalSet(b) |  | ||||||
|     assert(iset == IntervalSet([a, b])) |  | ||||||
|     iset = IntervalSet(a) |  | ||||||
|     iset += b |  | ||||||
|     assert(iset == IntervalSet([a, b])) |  | ||||||
|     iset = IntervalSet(a) + IntervalSet(b) |  | ||||||
|     assert(iset == IntervalSet([a, b])) |  | ||||||
|     iset = IntervalSet(b) + a |  | ||||||
|     assert(iset == IntervalSet([a, b])) |  | ||||||
|  |  | ||||||
|     # A set consisting of [0-1],[1-2] should match a set consisting of [0-2] |  | ||||||
|     assert(IntervalSet([a,b]) == IntervalSet([c])) |  | ||||||
|     # Etc |  | ||||||
|     assert(IntervalSet([a,d]) != IntervalSet([c])) |  | ||||||
|     assert(IntervalSet([c]) != IntervalSet([a,d])) |  | ||||||
|     assert(IntervalSet([c,d]) != IntervalSet([b,d])) |  | ||||||
|      |  | ||||||
|     # misc |  | ||||||
|     assert(repr(iset) == repr(eval(repr(iset).replace("datetime.","")))) |  | ||||||
|  |  | ||||||
| def iset(string): |  | ||||||
|     """Build an IntervalSet from a string, for testing purposes |  | ||||||
|  |  | ||||||
|     Each character is a year |  | ||||||
|     [ = interval start |  | ||||||
|     | = interval end + adjacent start |  | ||||||
|     ] = interval end |  | ||||||
|     anything else is ignored |  | ||||||
|     """ |  | ||||||
|     iset = IntervalSet() |  | ||||||
|     for i, c in enumerate(string): |  | ||||||
|         day = datetime.strptime("{0:04d}".format(i+2000), "%Y") |  | ||||||
|         if (c == "["): |  | ||||||
|             start = day |  | ||||||
|         elif (c == "|"): |  | ||||||
|             iset += Interval(start, day) |  | ||||||
|             start = day |  | ||||||
|         elif (c == "]"): |  | ||||||
|             iset += Interval(start, day) |  | ||||||
|             del start |  | ||||||
|     return iset |  | ||||||
|  |  | ||||||
| def test_intervalset_iset(): |  | ||||||
|     """Test basic iset construction""" |  | ||||||
|     assert(iset("  [----]   ") == |  | ||||||
|            iset("  [-|--]   ")) |  | ||||||
|  |  | ||||||
|     assert(iset("[]  [--]   ") + |  | ||||||
|            iset(" []    [--]") == |  | ||||||
|            iset("[|] [-----]")) |  | ||||||
|  |  | ||||||
| def test_intervalset_intsersect(): |  | ||||||
|     """Test intersection (&)""" |  | ||||||
|     assert_raises(TypeError, iset("[--]").__and__, 1234) |  | ||||||
|      |  | ||||||
|     assert(iset("[---------]") & |  | ||||||
|            iset(" [---]     ") == |  | ||||||
|            iset(" [---]     ")) |  | ||||||
|  |  | ||||||
|     assert(iset(" [---]     ") & |  | ||||||
|            iset("[---------]") == |  | ||||||
|            iset(" [---]     ")) |  | ||||||
|  |  | ||||||
|     assert(iset("    [-----]") & |  | ||||||
|            iset(" [-----]   ") == |  | ||||||
|            iset("    [--]   ")) |  | ||||||
|  |  | ||||||
|     assert(iset("      [---]") & |  | ||||||
|            iset(" [--]      ") == |  | ||||||
|            iset("           ")) |  | ||||||
|  |  | ||||||
|     assert(iset("    [-|---]") & |  | ||||||
|            iset(" [-----|-] ") == |  | ||||||
|            iset("    [----] ")) |  | ||||||
|  |  | ||||||
|     assert(iset("    [-|-]  ") & |  | ||||||
|            iset(" [-|--|--] ") == |  | ||||||
|            iset("    [---]  ")) |  | ||||||
|  |  | ||||||
|     assert(iset(" [----][--]") & |  | ||||||
|            iset("[-] [--] []") == |  | ||||||
|            iset(" [] [-]  []")) |  | ||||||
|  |  | ||||||
							
								
								
									
										16
									
								
								nilmdb/utils/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										16
									
								
								nilmdb/utils/__init__.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,16 @@ | |||||||
|  | """NilmDB utilities""" | ||||||
|  |  | ||||||
|  |  | ||||||
|  | from nilmdb.utils.timer import Timer | ||||||
|  | from nilmdb.utils.serializer import serializer_proxy | ||||||
|  | from nilmdb.utils.lrucache import lru_cache | ||||||
|  | from nilmdb.utils.diskusage import du, human_size | ||||||
|  | from nilmdb.utils.mustclose import must_close | ||||||
|  | from nilmdb.utils import atomic | ||||||
|  | import nilmdb.utils.threadsafety | ||||||
|  | import nilmdb.utils.fallocate | ||||||
|  | import nilmdb.utils.time | ||||||
|  | import nilmdb.utils.iterator | ||||||
|  | import nilmdb.utils.interval | ||||||
|  | import nilmdb.utils.lock | ||||||
|  | import nilmdb.utils.sort | ||||||
							
								
								
									
										19
									
								
								nilmdb/utils/atomic.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										19
									
								
								nilmdb/utils/atomic.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,19 @@ | |||||||
|  | # Atomic file writing helper. | ||||||
|  |  | ||||||
|  | import os | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def replace_file(filename, content): | ||||||
|  |     """Attempt to atomically and durably replace the filename with the | ||||||
|  |     given contents""" | ||||||
|  |  | ||||||
|  |     newfilename = filename + b".new" | ||||||
|  |  | ||||||
|  |     # Write to new file, flush it | ||||||
|  |     with open(newfilename, "wb") as f: | ||||||
|  |         f.write(content) | ||||||
|  |         f.flush() | ||||||
|  |         os.fsync(f.fileno()) | ||||||
|  |  | ||||||
|  |     # Move new file over old one | ||||||
|  |     os.replace(newfilename, filename) | ||||||
							
								
								
									
										36
									
								
								nilmdb/utils/diskusage.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										36
									
								
								nilmdb/utils/diskusage.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,36 @@ | |||||||
|  | import os | ||||||
|  | import errno | ||||||
|  | from math import log | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def human_size(num): | ||||||
|  |     """Human friendly file size""" | ||||||
|  |     unit_list = list(zip(['bytes', 'kiB', 'MiB', 'GiB', 'TiB'], | ||||||
|  |                          [0, 0, 1, 2, 2])) | ||||||
|  |     if num == 0: | ||||||
|  |         return '0 bytes' | ||||||
|  |     if num == 1: | ||||||
|  |         return '1 byte' | ||||||
|  |     exponent = min(int(log(num, 1024)), len(unit_list) - 1) | ||||||
|  |     quotient = float(num) / 1024**exponent | ||||||
|  |     unit, num_decimals = unit_list[exponent] | ||||||
|  |     format_string = '{:.%sf} {}' % (num_decimals) | ||||||
|  |     return format_string.format(quotient, unit) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def du(path): | ||||||
|  |     """Like du -sb, returns total size of path in bytes.  Ignore | ||||||
|  |     errors that might occur if we encounter broken symlinks or | ||||||
|  |     files in the process of being removed.""" | ||||||
|  |     try: | ||||||
|  |         st = os.stat(path) | ||||||
|  |         size = st.st_blocks * 512 | ||||||
|  |         if os.path.isdir(path): | ||||||
|  |             for thisfile in os.listdir(path): | ||||||
|  |                 filepath = os.path.join(path, thisfile) | ||||||
|  |                 size += du(filepath) | ||||||
|  |         return size | ||||||
|  |     except OSError as e: | ||||||
|  |         if e.errno != errno.ENOENT: | ||||||
|  |             raise | ||||||
|  |         return 0 | ||||||
							
								
								
									
										20
									
								
								nilmdb/utils/fallocate.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										20
									
								
								nilmdb/utils/fallocate.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,20 @@ | |||||||
|  | # Implementation of hole punching via fallocate, if the OS | ||||||
|  | # and filesystem support it. | ||||||
|  |  | ||||||
|  | import fallocate | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def punch_hole(filename, offset, length, ignore_errors=True): | ||||||
|  |     """Punch a hole in the file.  This isn't well supported, so errors | ||||||
|  |     are ignored by default.""" | ||||||
|  |     try: | ||||||
|  |         with open(filename, "r+") as f: | ||||||
|  |             fallocate.fallocate( | ||||||
|  |                 f.fileno(), | ||||||
|  |                 offset, | ||||||
|  |                 length, | ||||||
|  |                 fallocate.FALLOC_FL_KEEP_SIZE | fallocate.FALLOC_FL_PUNCH_HOLE) | ||||||
|  |     except Exception: | ||||||
|  |         if ignore_errors: | ||||||
|  |             return | ||||||
|  |         raise | ||||||
							
								
								
									
										168
									
								
								nilmdb/utils/interval.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										168
									
								
								nilmdb/utils/interval.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,168 @@ | |||||||
|  | """Interval.  Like nilmdb.server.interval, but re-implemented here | ||||||
|  | in plain Python so clients have easier access to it, and with a few | ||||||
|  | helper functions. | ||||||
|  |  | ||||||
|  | Intervals are half-open, ie. they include data points with timestamps | ||||||
|  | [start, end) | ||||||
|  | """ | ||||||
|  |  | ||||||
|  | import nilmdb.utils.time | ||||||
|  | import nilmdb.utils.iterator | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class IntervalError(Exception): | ||||||
|  |     """Error due to interval overlap, etc""" | ||||||
|  |     pass | ||||||
|  |  | ||||||
|  |  | ||||||
|  | # Interval | ||||||
|  | class Interval: | ||||||
|  |     """Represents an interval of time.""" | ||||||
|  |  | ||||||
|  |     def __init__(self, start, end): | ||||||
|  |         """ | ||||||
|  |         'start' and 'end' are arbitrary numbers that represent time | ||||||
|  |         """ | ||||||
|  |         if start >= end: | ||||||
|  |             # Explicitly disallow zero-width intervals, since they're half-open | ||||||
|  |             raise IntervalError("start %s must precede end %s" % (start, end)) | ||||||
|  |         self.start = start | ||||||
|  |         self.end = end | ||||||
|  |  | ||||||
|  |     def __repr__(self): | ||||||
|  |         s = repr(self.start) + ", " + repr(self.end) | ||||||
|  |         return self.__class__.__name__ + "(" + s + ")" | ||||||
|  |  | ||||||
|  |     def __str__(self): | ||||||
|  |         return ("[" + nilmdb.utils.time.timestamp_to_string(self.start) + | ||||||
|  |                 " -> " + nilmdb.utils.time.timestamp_to_string(self.end) + ")") | ||||||
|  |  | ||||||
|  |     def human_string(self): | ||||||
|  |         return ("[ " + nilmdb.utils.time.timestamp_to_human(self.start) + | ||||||
|  |                 " -> " + nilmdb.utils.time.timestamp_to_human(self.end) + " ]") | ||||||
|  |  | ||||||
|  |     # Compare two intervals.  If non-equal, order by start then end | ||||||
|  |     def __lt__(self, other): | ||||||
|  |         return (self.start, self.end) < (other.start, other.end) | ||||||
|  |  | ||||||
|  |     def __gt__(self, other): | ||||||
|  |         return (self.start, self.end) > (other.start, other.end) | ||||||
|  |  | ||||||
|  |     def __le__(self, other): | ||||||
|  |         return (self.start, self.end) <= (other.start, other.end) | ||||||
|  |  | ||||||
|  |     def __ge__(self, other): | ||||||
|  |         return (self.start, self.end) >= (other.start, other.end) | ||||||
|  |  | ||||||
|  |     def __eq__(self, other): | ||||||
|  |         return (self.start, self.end) == (other.start, other.end) | ||||||
|  |  | ||||||
|  |     def __ne__(self, other): | ||||||
|  |         return (self.start, self.end) != (other.start, other.end) | ||||||
|  |  | ||||||
|  |     def intersects(self, other): | ||||||
|  |         """Return True if two Interval objects intersect""" | ||||||
|  |         if not isinstance(other, Interval): | ||||||
|  |             raise TypeError("need an Interval") | ||||||
|  |         if self.end <= other.start or self.start >= other.end: | ||||||
|  |             return False | ||||||
|  |         return True | ||||||
|  |  | ||||||
|  |     def subset(self, start, end): | ||||||
|  |         """Return a new Interval that is a subset of this one""" | ||||||
|  |         # A subclass that tracks additional data might override this. | ||||||
|  |         if start < self.start or end > self.end: | ||||||
|  |             raise IntervalError("not a subset") | ||||||
|  |         return Interval(start, end) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def _interval_math_helper(a, b, op, subset=True): | ||||||
|  |     """Helper for set_difference, intersection functions, | ||||||
|  |     to compute interval subsets based on a math operator on ranges | ||||||
|  |     present in A and B.  Subsets are computed from A, or new intervals | ||||||
|  |     are generated if subset = False.""" | ||||||
|  |     # Iterate through all starts and ends in sorted order.  Add a | ||||||
|  |     # tag to the iterator so that we can figure out which one they | ||||||
|  |     # were, after sorting. | ||||||
|  |     def decorate(it, key_start, key_end): | ||||||
|  |         for i in it: | ||||||
|  |             yield i.start, key_start, i | ||||||
|  |             yield i.end, key_end, i | ||||||
|  |     a_iter = decorate(iter(a), 0, 2) | ||||||
|  |     b_iter = decorate(iter(b), 1, 3) | ||||||
|  |  | ||||||
|  |     # Now iterate over the timestamps of each start and end. | ||||||
|  |     # At each point, evaluate which type of end it is, to determine | ||||||
|  |     # how to build up the output intervals. | ||||||
|  |     a_interval = None | ||||||
|  |     in_a = False | ||||||
|  |     in_b = False | ||||||
|  |     out_start = None | ||||||
|  |     for (ts, k, i) in nilmdb.utils.iterator.imerge(a_iter, b_iter): | ||||||
|  |         if k == 0: | ||||||
|  |             a_interval = i | ||||||
|  |             in_a = True | ||||||
|  |         elif k == 1: | ||||||
|  |             in_b = True | ||||||
|  |         elif k == 2: | ||||||
|  |             in_a = False | ||||||
|  |         else:  # k == 3 | ||||||
|  |             in_b = False | ||||||
|  |         include = op(in_a, in_b) | ||||||
|  |         if include and out_start is None: | ||||||
|  |             out_start = ts | ||||||
|  |         elif not include: | ||||||
|  |             if out_start is not None and out_start != ts: | ||||||
|  |                 if subset: | ||||||
|  |                     yield a_interval.subset(out_start, ts) | ||||||
|  |                 else: | ||||||
|  |                     yield Interval(out_start, ts) | ||||||
|  |             out_start = None | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def set_difference(a, b): | ||||||
|  |     """ | ||||||
|  |     Compute the difference (a \\ b) between the intervals in 'a' and | ||||||
|  |     the intervals in 'b'; i.e., the ranges that are present in 'self' | ||||||
|  |     but not 'other'. | ||||||
|  |  | ||||||
|  |     'a' and 'b' must both be iterables. | ||||||
|  |  | ||||||
|  |     Returns a generator that yields each interval in turn. | ||||||
|  |     Output intervals are built as subsets of the intervals in the | ||||||
|  |     first argument (a). | ||||||
|  |     """ | ||||||
|  |     return _interval_math_helper(a, b, (lambda a, b: a and not b)) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def intersection(a, b): | ||||||
|  |     """ | ||||||
|  |     Compute the intersection between the intervals in 'a' and the | ||||||
|  |     intervals in 'b'; i.e., the ranges that are present in both 'a' | ||||||
|  |     and 'b'. | ||||||
|  |  | ||||||
|  |     'a' and 'b' must both be iterables. | ||||||
|  |  | ||||||
|  |     Returns a generator that yields each interval in turn. | ||||||
|  |     Output intervals are built as subsets of the intervals in the | ||||||
|  |     first argument (a). | ||||||
|  |     """ | ||||||
|  |     return _interval_math_helper(a, b, (lambda a, b: a and b)) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def optimize(it): | ||||||
|  |     """ | ||||||
|  |     Given an iterable 'it' with intervals, optimize them by joining | ||||||
|  |     together intervals that are adjacent in time, and return a generator | ||||||
|  |     that yields the new intervals. | ||||||
|  |     """ | ||||||
|  |     saved_int = None | ||||||
|  |     for interval in it: | ||||||
|  |         if saved_int is not None: | ||||||
|  |             if saved_int.end == interval.start: | ||||||
|  |                 interval.start = saved_int.start | ||||||
|  |             else: | ||||||
|  |                 yield saved_int | ||||||
|  |         saved_int = interval | ||||||
|  |     if saved_int is not None: | ||||||
|  |         yield saved_int | ||||||
							
								
								
									
										38
									
								
								nilmdb/utils/iterator.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										38
									
								
								nilmdb/utils/iterator.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,38 @@ | |||||||
|  | # Misc iterator tools | ||||||
|  |  | ||||||
|  | # Iterator merging, based on http://code.activestate.com/recipes/491285/ | ||||||
|  | import heapq | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def imerge(*iterables): | ||||||
|  |     '''Merge multiple sorted inputs into a single sorted output. | ||||||
|  |  | ||||||
|  |     Equivalent to:  sorted(itertools.chain(*iterables)) | ||||||
|  |  | ||||||
|  |     >>> list(imerge([1,3,5,7], [0,2,4,8], [5,10,15,20], [], [25])) | ||||||
|  |     [0, 1, 2, 3, 4, 5, 5, 7, 8, 10, 15, 20, 25] | ||||||
|  |  | ||||||
|  |     ''' | ||||||
|  |     heappop, siftup, _Stop = heapq.heappop, heapq._siftup, StopIteration | ||||||
|  |  | ||||||
|  |     h = [] | ||||||
|  |     h_append = h.append | ||||||
|  |     for it in map(iter, iterables): | ||||||
|  |         try: | ||||||
|  |             nexter = it.__next__ | ||||||
|  |             h_append([nexter(), nexter]) | ||||||
|  |         except _Stop: | ||||||
|  |             pass | ||||||
|  |     heapq.heapify(h) | ||||||
|  |  | ||||||
|  |     while 1: | ||||||
|  |         try: | ||||||
|  |             while 1: | ||||||
|  |                 v, nexter = s = h[0]    # raises IndexError when h is empty | ||||||
|  |                 yield v | ||||||
|  |                 s[0] = nexter()         # raises StopIteration when exhausted | ||||||
|  |                 siftup(h, 0)            # restore heap condition | ||||||
|  |         except _Stop: | ||||||
|  |             heappop(h)                  # remove empty iterator | ||||||
|  |         except IndexError: | ||||||
|  |             return | ||||||
							
								
								
									
										22
									
								
								nilmdb/utils/lock.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										22
									
								
								nilmdb/utils/lock.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,22 @@ | |||||||
|  | # File locking | ||||||
|  |  | ||||||
|  | import fcntl | ||||||
|  | import errno | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def exclusive_lock(f): | ||||||
|  |     """Acquire an exclusive lock.  Returns True on successful | ||||||
|  |     lock, or False on error.""" | ||||||
|  |     try: | ||||||
|  |         fcntl.flock(f.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB) | ||||||
|  |     except IOError as e: | ||||||
|  |         if e.errno in (errno.EACCES, errno.EAGAIN): | ||||||
|  |             return False | ||||||
|  |         else: | ||||||
|  |             raise | ||||||
|  |     return True | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def exclusive_unlock(f): | ||||||
|  |     """Release an exclusive lock.""" | ||||||
|  |     fcntl.flock(f.fileno(), fcntl.LOCK_UN) | ||||||
							
								
								
									
										80
									
								
								nilmdb/utils/lrucache.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										80
									
								
								nilmdb/utils/lrucache.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,80 @@ | |||||||
|  | # Memoize a function's return value with a least-recently-used cache | ||||||
|  | # Based on: | ||||||
|  | #   http://code.activestate.com/recipes/498245-lru-and-lfu-cache-decorators/ | ||||||
|  | # with added 'destructor' functionality. | ||||||
|  |  | ||||||
|  | import collections | ||||||
|  | import decorator | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def lru_cache(size=10, onremove=None, keys=slice(None)): | ||||||
|  |     """Least-recently-used cache decorator. | ||||||
|  |  | ||||||
|  |     @lru_cache(size=10, onremove=None) | ||||||
|  |     def f(...): | ||||||
|  |         pass | ||||||
|  |  | ||||||
|  |     Given a function and arguments, memoize its return value.  Up to | ||||||
|  |     'size' elements are cached.  'keys' is a slice object that | ||||||
|  |     represents which arguments are used as the cache key. | ||||||
|  |  | ||||||
|  |     When evicting a value from the cache, call the function | ||||||
|  |     'onremove' with the value that's being evicted. | ||||||
|  |  | ||||||
|  |     Call f.cache_remove(...) to evict the cache entry with the given | ||||||
|  |     arguments.  Call f.cache_remove_all() to evict all entries. | ||||||
|  |     f.cache_hits and f.cache_misses give statistics. | ||||||
|  |     """ | ||||||
|  |  | ||||||
|  |     def decorate(func): | ||||||
|  |         cache = collections.OrderedDict()  # order: least- to most-recent | ||||||
|  |  | ||||||
|  |         def evict(value): | ||||||
|  |             if onremove: | ||||||
|  |                 onremove(value) | ||||||
|  |  | ||||||
|  |         def wrapper(orig, *args, **kwargs): | ||||||
|  |             if kwargs: | ||||||
|  |                 raise NotImplementedError("kwargs not supported") | ||||||
|  |             key = args[keys] | ||||||
|  |             try: | ||||||
|  |                 value = cache.pop(key) | ||||||
|  |                 orig.cache_hits += 1 | ||||||
|  |             except KeyError: | ||||||
|  |                 value = orig(*args) | ||||||
|  |                 orig.cache_misses += 1 | ||||||
|  |                 if len(cache) >= size: | ||||||
|  |                     evict(cache.popitem(0)[1])  # evict LRU cache entry | ||||||
|  |             cache[key] = value                  # (re-)insert this key at end | ||||||
|  |             return value | ||||||
|  |  | ||||||
|  |         def cache_remove(*args): | ||||||
|  |             """Remove the described key from this cache, if present.""" | ||||||
|  |             key = args | ||||||
|  |             if key in cache: | ||||||
|  |                 evict(cache.pop(key)) | ||||||
|  |             else: | ||||||
|  |                 if cache: | ||||||
|  |                     if len(args) != len(next(iter(cache.keys()))): | ||||||
|  |                         raise KeyError("trying to remove from LRU cache, but " | ||||||
|  |                                        "number of arguments doesn't match the " | ||||||
|  |                                        "cache key length") | ||||||
|  |  | ||||||
|  |         def cache_remove_all(): | ||||||
|  |             nonlocal cache | ||||||
|  |             for key in cache: | ||||||
|  |                 evict(cache[key]) | ||||||
|  |             cache = collections.OrderedDict() | ||||||
|  |  | ||||||
|  |         def cache_info(): | ||||||
|  |             return (func.cache_hits, func.cache_misses) | ||||||
|  |  | ||||||
|  |         new = decorator.decorator(wrapper, func) | ||||||
|  |         func.cache_hits = 0 | ||||||
|  |         func.cache_misses = 0 | ||||||
|  |         new.cache_info = cache_info | ||||||
|  |         new.cache_remove = cache_remove | ||||||
|  |         new.cache_remove_all = cache_remove_all | ||||||
|  |         return new | ||||||
|  |  | ||||||
|  |     return decorate | ||||||
							
								
								
									
										71
									
								
								nilmdb/utils/mustclose.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										71
									
								
								nilmdb/utils/mustclose.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,71 @@ | |||||||
|  | import sys | ||||||
|  | import inspect | ||||||
|  | import decorator | ||||||
|  | from nilmdb.utils.printf import fprintf | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def must_close(errorfile=sys.stderr, wrap_verify=False): | ||||||
|  |     """Class decorator that warns on 'errorfile' at deletion time if | ||||||
|  |     the class's close() member wasn't called. | ||||||
|  |  | ||||||
|  |     If 'wrap_verify' is True, every class method is wrapped with a | ||||||
|  |     verifier that will raise AssertionError if the .close() method has | ||||||
|  |     already been called.""" | ||||||
|  |     def class_decorator(cls): | ||||||
|  |  | ||||||
|  |         def is_method_or_function(x): | ||||||
|  |             return inspect.ismethod(x) or inspect.isfunction(x) | ||||||
|  |  | ||||||
|  |         def wrap_class_method(wrapper): | ||||||
|  |             try: | ||||||
|  |                 orig = getattr(cls, wrapper.__name__) | ||||||
|  |             except AttributeError: | ||||||
|  |                 orig = lambda x: None | ||||||
|  |             if is_method_or_function(orig): | ||||||
|  |                 setattr(cls, wrapper.__name__, | ||||||
|  |                         decorator.decorator(wrapper, orig)) | ||||||
|  |  | ||||||
|  |         @wrap_class_method | ||||||
|  |         def __init__(orig, self, *args, **kwargs): | ||||||
|  |             ret = orig(self, *args, **kwargs) | ||||||
|  |             self.__dict__["_must_close"] = True | ||||||
|  |             self.__dict__["_must_close_initialized"] = True | ||||||
|  |             return ret | ||||||
|  |  | ||||||
|  |         @wrap_class_method | ||||||
|  |         def __del__(orig, self, *args, **kwargs): | ||||||
|  |             try: | ||||||
|  |                 if "_must_close" in self.__dict__: | ||||||
|  |                     fprintf(errorfile, "error: %s.close() wasn't called!\n", | ||||||
|  |                             self.__class__.__name__) | ||||||
|  |                 return orig(self, *args, **kwargs) | ||||||
|  |             except: | ||||||
|  |                 pass | ||||||
|  |  | ||||||
|  |         @wrap_class_method | ||||||
|  |         def close(orig, self, *args, **kwargs): | ||||||
|  |             if "_must_close" in self.__dict__: | ||||||
|  |                 del self._must_close | ||||||
|  |             return orig(self, *args, **kwargs) | ||||||
|  |  | ||||||
|  |         # Optionally wrap all other functions | ||||||
|  |         def verifier(orig, self, *args, **kwargs): | ||||||
|  |             if ("_must_close" not in self.__dict__ and | ||||||
|  |                     "_must_close_initialized" in self.__dict__): | ||||||
|  |                 raise AssertionError("called " + str(orig) + " after close") | ||||||
|  |             return orig(self, *args, **kwargs) | ||||||
|  |         if wrap_verify: | ||||||
|  |             for (name, method) in inspect.getmembers(cls, | ||||||
|  |                                                      is_method_or_function): | ||||||
|  |                 # Skip some methods | ||||||
|  |                 if name in ["__del__", "__init__"]: | ||||||
|  |                     continue | ||||||
|  |                 # Set up wrapper | ||||||
|  |                 if inspect.ismethod(method): | ||||||
|  |                     func = method.__func__ | ||||||
|  |                 else: | ||||||
|  |                     func = method | ||||||
|  |                 setattr(cls, name, decorator.decorator(verifier, func)) | ||||||
|  |  | ||||||
|  |         return cls | ||||||
|  |     return class_decorator | ||||||
							
								
								
									
										13
									
								
								nilmdb/utils/printf.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										13
									
								
								nilmdb/utils/printf.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,13 @@ | |||||||
|  | """printf, fprintf, sprintf""" | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def printf(_str, *args): | ||||||
|  |     print(_str % args, end='') | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def fprintf(_file, _str, *args): | ||||||
|  |     print(_str % args, end='', file=_file) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def sprintf(_str, *args): | ||||||
|  |     return (_str % args) | ||||||
							
								
								
									
										134
									
								
								nilmdb/utils/serializer.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										134
									
								
								nilmdb/utils/serializer.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,134 @@ | |||||||
|  | import queue | ||||||
|  | import threading | ||||||
|  | import sys | ||||||
|  |  | ||||||
|  | # This file provides a class that will wrap an object and serialize | ||||||
|  | # all calls to its methods.  All calls to that object will be queued | ||||||
|  | # and executed from a single thread, regardless of which thread makes | ||||||
|  | # the call. | ||||||
|  |  | ||||||
|  | # Based partially on http://stackoverflow.com/questions/2642515/ | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class SerializerThread(threading.Thread): | ||||||
|  |     """Thread that retrieves call information from the queue, makes the | ||||||
|  |     call, and returns the results.""" | ||||||
|  |     def __init__(self, classname, call_queue): | ||||||
|  |         threading.Thread.__init__(self) | ||||||
|  |         self.name = "Serializer-" + classname + "-" + self.name | ||||||
|  |         self.call_queue = call_queue | ||||||
|  |  | ||||||
|  |     def run(self): | ||||||
|  |         while True: | ||||||
|  |             result_queue, func, args, kwargs = self.call_queue.get() | ||||||
|  |             # Terminate if result_queue is None | ||||||
|  |             if result_queue is None: | ||||||
|  |                 return | ||||||
|  |             exception = None | ||||||
|  |             result = None | ||||||
|  |             try: | ||||||
|  |                 result = func(*args, **kwargs)  # wrapped | ||||||
|  |             except: | ||||||
|  |                 exception = sys.exc_info() | ||||||
|  |             # Ensure we delete these before returning a result, so | ||||||
|  |             # we don't unncessarily hold onto a reference while | ||||||
|  |             # we're waiting for the next call. | ||||||
|  |             del func, args, kwargs | ||||||
|  |             result_queue.put((exception, result)) | ||||||
|  |             del exception, result | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def serializer_proxy(obj_or_type): | ||||||
|  |     """Wrap the given object or type in a SerializerObjectProxy. | ||||||
|  |  | ||||||
|  |     Returns a SerializerObjectProxy object that proxies all method | ||||||
|  |     calls to the object, as well as attribute retrievals. | ||||||
|  |  | ||||||
|  |     The proxied requests, including instantiation, are performed in a | ||||||
|  |     single thread and serialized between caller threads. | ||||||
|  |     """ | ||||||
|  |     class SerializerCallProxy(): | ||||||
|  |         def __init__(self, call_queue, func, objectproxy): | ||||||
|  |             self.call_queue = call_queue | ||||||
|  |             self.func = func | ||||||
|  |             # Need to hold a reference to object proxy so it doesn't | ||||||
|  |             # go away (and kill the thread) until after get called. | ||||||
|  |             self.objectproxy = objectproxy | ||||||
|  |  | ||||||
|  |         def __call__(self, *args, **kwargs): | ||||||
|  |             result_queue = queue.Queue() | ||||||
|  |             self.call_queue.put((result_queue, self.func, args, kwargs)) | ||||||
|  |             (exc_info, result) = result_queue.get() | ||||||
|  |             if exc_info is None: | ||||||
|  |                 return result | ||||||
|  |             else: | ||||||
|  |                 raise exc_info[1].with_traceback(exc_info[2]) | ||||||
|  |  | ||||||
|  |     class SerializerObjectProxy(): | ||||||
|  |         def __init__(self, obj_or_type, *args, **kwargs): | ||||||
|  |             self.__object = obj_or_type | ||||||
|  |             if isinstance(obj_or_type, type): | ||||||
|  |                 classname = obj_or_type.__name__ | ||||||
|  |             else: | ||||||
|  |                 classname = obj_or_type.__class__.__name__ | ||||||
|  |             self.__call_queue = queue.Queue() | ||||||
|  |             self.__thread = SerializerThread(classname, self.__call_queue) | ||||||
|  |             self.__thread.daemon = True | ||||||
|  |             self.__thread.start() | ||||||
|  |             self._thread_safe = True | ||||||
|  |  | ||||||
|  |         def __getattr__(self, key): | ||||||
|  |             # If the attribute is a function, we want to return a | ||||||
|  |             # proxy that will perform the call through the serializer | ||||||
|  |             # when called.  Otherwise, we want to return the value | ||||||
|  |             # directly.  This means we need to grab the attribute once, | ||||||
|  |             # and therefore self.__object.__getattr__ may be called | ||||||
|  |             # in an unsafe way, from the caller's thread. | ||||||
|  |             attr = getattr(self.__object, key) | ||||||
|  |             if not callable(attr): | ||||||
|  |                 # It's not callable, so perform the getattr from within | ||||||
|  |                 # the serializer thread, then return its value. | ||||||
|  |                 # That may differ from the "attr" value we just grabbed | ||||||
|  |                 # from here, due to forced ordering in the serializer. | ||||||
|  |                 getter = SerializerCallProxy(self.__call_queue, getattr, self) | ||||||
|  |                 return getter(self.__object, key) | ||||||
|  |             else: | ||||||
|  |                 # It is callable, so return an object that will proxy through | ||||||
|  |                 # the serializer when called. | ||||||
|  |                 r = SerializerCallProxy(self.__call_queue, attr, self) | ||||||
|  |                 return r | ||||||
|  |  | ||||||
|  |         # For an interable object, on __iter__(), save the object's | ||||||
|  |         # iterator and return this proxy.  On next(), call the object's | ||||||
|  |         # iterator through this proxy. | ||||||
|  |         def __iter__(self): | ||||||
|  |             attr = getattr(self.__object, "__iter__") | ||||||
|  |             self.__iter = SerializerCallProxy(self.__call_queue, attr, self)() | ||||||
|  |             return self | ||||||
|  |  | ||||||
|  |         def __next__(self): | ||||||
|  |             return SerializerCallProxy(self.__call_queue, | ||||||
|  |                                        self.__iter.__next__, self)() | ||||||
|  |  | ||||||
|  |         def __getitem__(self, key): | ||||||
|  |             return self.__getattr__("__getitem__")(key) | ||||||
|  |  | ||||||
|  |         def __call__(self, *args, **kwargs): | ||||||
|  |             """Call this to instantiate the type, if a type was passed | ||||||
|  |             to serializer_proxy.  Otherwise, pass the call through.""" | ||||||
|  |             ret = SerializerCallProxy(self.__call_queue, | ||||||
|  |                                       self.__object, self)(*args, **kwargs) | ||||||
|  |             if isinstance(self.__object, type): | ||||||
|  |                 # Instantiation | ||||||
|  |                 self.__object = ret | ||||||
|  |                 return self | ||||||
|  |             return ret | ||||||
|  |  | ||||||
|  |         def __del__(self): | ||||||
|  |             try: | ||||||
|  |                 # Signal thread to exit, but don't wait for it. | ||||||
|  |                 self.__call_queue.put((None, None, None, None)) | ||||||
|  |             except: | ||||||
|  |                 pass | ||||||
|  |  | ||||||
|  |     return SerializerObjectProxy(obj_or_type) | ||||||
							
								
								
									
										19
									
								
								nilmdb/utils/sort.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										19
									
								
								nilmdb/utils/sort.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,19 @@ | |||||||
|  | import re | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def sort_human(items, key=None): | ||||||
|  |     """Human-friendly sort (/stream/2 before /stream/10)""" | ||||||
|  |     def to_num(val): | ||||||
|  |         try: | ||||||
|  |             return int(val) | ||||||
|  |         except Exception: | ||||||
|  |             return val | ||||||
|  |  | ||||||
|  |     def human_key(text): | ||||||
|  |         if key: | ||||||
|  |             text = key(text) | ||||||
|  |         # Break into character and numeric chunks. | ||||||
|  |         chunks = re.split(r'([0-9]+)', text) | ||||||
|  |         return [to_num(c) for c in chunks] | ||||||
|  |  | ||||||
|  |     return sorted(items, key=human_key) | ||||||
							
								
								
									
										97
									
								
								nilmdb/utils/threadsafety.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										97
									
								
								nilmdb/utils/threadsafety.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,97 @@ | |||||||
|  | import threading | ||||||
|  | from nilmdb.utils.printf import sprintf | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def verify_proxy(obj_or_type, check_thread=True, | ||||||
|  |                  check_concurrent=True): | ||||||
|  |     """Wrap the given object or type in a VerifyObjectProxy. | ||||||
|  |  | ||||||
|  |     Returns a VerifyObjectProxy that proxies all method calls to the | ||||||
|  |     given object, as well as attribute retrievals. | ||||||
|  |  | ||||||
|  |     When calling methods, the following checks are performed.  On | ||||||
|  |     failure, an exception is raised. | ||||||
|  |  | ||||||
|  |     check_thread = True     # Fail if two different threads call methods. | ||||||
|  |     check_concurrent = True # Fail if two functions are concurrently | ||||||
|  |                             # run through this proxy | ||||||
|  |     """ | ||||||
|  |     class Namespace(): | ||||||
|  |         pass | ||||||
|  |  | ||||||
|  |     class VerifyCallProxy(): | ||||||
|  |         def __init__(self, func, parent_namespace): | ||||||
|  |             self.func = func | ||||||
|  |             self.parent_namespace = parent_namespace | ||||||
|  |  | ||||||
|  |         def __call__(self, *args, **kwargs): | ||||||
|  |             p = self.parent_namespace | ||||||
|  |             this = threading.current_thread() | ||||||
|  |             try: | ||||||
|  |                 callee = self.func.__name__ | ||||||
|  |             except AttributeError: | ||||||
|  |                 callee = "???" | ||||||
|  |  | ||||||
|  |             if p.thread is None: | ||||||
|  |                 p.thread = this | ||||||
|  |                 p.thread_callee = callee | ||||||
|  |  | ||||||
|  |             if check_thread and p.thread != this: | ||||||
|  |                 err = sprintf("unsafe threading: %s called %s.%s," | ||||||
|  |                               " but %s called %s.%s", | ||||||
|  |                               p.thread.name, p.classname, p.thread_callee, | ||||||
|  |                               this.name, p.classname, callee) | ||||||
|  |                 raise AssertionError(err) | ||||||
|  |  | ||||||
|  |             need_concur_unlock = False | ||||||
|  |             if check_concurrent: | ||||||
|  |                 if not p.concur_lock.acquire(False): | ||||||
|  |                     err = sprintf("unsafe concurrency: %s called %s.%s " | ||||||
|  |                                   "while %s is still in %s.%s", | ||||||
|  |                                   this.name, p.classname, callee, | ||||||
|  |                                   p.concur_tname, p.classname, p.concur_callee) | ||||||
|  |                     raise AssertionError(err) | ||||||
|  |                 else: | ||||||
|  |                     p.concur_tname = this.name | ||||||
|  |                     p.concur_callee = callee | ||||||
|  |                     need_concur_unlock = True | ||||||
|  |  | ||||||
|  |             try: | ||||||
|  |                 ret = self.func(*args, **kwargs) | ||||||
|  |             finally: | ||||||
|  |                 if need_concur_unlock: | ||||||
|  |                     p.concur_lock.release() | ||||||
|  |             return ret | ||||||
|  |  | ||||||
|  |     class VerifyObjectProxy(): | ||||||
|  |         def __init__(self, obj_or_type, *args, **kwargs): | ||||||
|  |             p = Namespace() | ||||||
|  |             self.__ns = p | ||||||
|  |             p.thread = None | ||||||
|  |             p.thread_callee = None | ||||||
|  |             p.concur_lock = threading.Lock() | ||||||
|  |             p.concur_tname = None | ||||||
|  |             p.concur_callee = None | ||||||
|  |             self.__obj = obj_or_type | ||||||
|  |             if isinstance(obj_or_type, type): | ||||||
|  |                 p.classname = self.__obj.__name__ | ||||||
|  |             else: | ||||||
|  |                 p.classname = self.__obj.__class__.__name__ | ||||||
|  |  | ||||||
|  |         def __getattr__(self, key): | ||||||
|  |             attr = getattr(self.__obj, key) | ||||||
|  |             if not callable(attr): | ||||||
|  |                 return VerifyCallProxy(getattr, self.__ns)(self.__obj, key) | ||||||
|  |             return VerifyCallProxy(attr, self.__ns) | ||||||
|  |  | ||||||
|  |         def __call__(self, *args, **kwargs): | ||||||
|  |             """Call this to instantiate the type, if a type was passed | ||||||
|  |             to verify_proxy.  Otherwise, pass the call through.""" | ||||||
|  |             ret = VerifyCallProxy(self.__obj, self.__ns)(*args, **kwargs) | ||||||
|  |             if isinstance(self.__obj, type): | ||||||
|  |                 # Instantiation | ||||||
|  |                 self.__obj = ret | ||||||
|  |                 return self | ||||||
|  |             return ret | ||||||
|  |  | ||||||
|  |     return VerifyObjectProxy(obj_or_type) | ||||||
							
								
								
									
										148
									
								
								nilmdb/utils/time.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										148
									
								
								nilmdb/utils/time.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,148 @@ | |||||||
|  | import re | ||||||
|  | import time | ||||||
|  | import datetime_tz | ||||||
|  |  | ||||||
|  | # Range | ||||||
|  | min_timestamp = (-2**63) | ||||||
|  | max_timestamp = (2**63 - 1) | ||||||
|  |  | ||||||
|  | # Smallest representable step | ||||||
|  | epsilon = 1 | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def string_to_timestamp(string): | ||||||
|  |     """Convert a string that represents an integer number of microseconds | ||||||
|  |     since epoch.""" | ||||||
|  |     try: | ||||||
|  |         # Parse a string like "1234567890123456" and return an integer | ||||||
|  |         return int(string) | ||||||
|  |     except ValueError: | ||||||
|  |         # Try parsing as a float, in case it's "1234567890123456.0" | ||||||
|  |         return int(round(float(string))) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def timestamp_to_string(timestamp): | ||||||
|  |     """Convert a timestamp (integer microseconds since epoch) to a string""" | ||||||
|  |     if isinstance(timestamp, float): | ||||||
|  |         return str(int(round(timestamp))) | ||||||
|  |     else: | ||||||
|  |         return str(timestamp) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def timestamp_to_bytes(timestamp): | ||||||
|  |     """Convert a timestamp (integer microseconds since epoch) to a Python | ||||||
|  |     bytes object""" | ||||||
|  |     return timestamp_to_string(timestamp).encode('utf-8') | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def timestamp_to_human(timestamp): | ||||||
|  |     """Convert a timestamp (integer microseconds since epoch) to a | ||||||
|  |     human-readable string, using the local timezone for display | ||||||
|  |     (e.g. from the TZ env var).""" | ||||||
|  |     if timestamp == min_timestamp: | ||||||
|  |         return "(minimum)" | ||||||
|  |     if timestamp == max_timestamp: | ||||||
|  |         return "(maximum)" | ||||||
|  |     dt = datetime_tz.datetime_tz.fromtimestamp(timestamp_to_unix(timestamp)) | ||||||
|  |     return dt.strftime("%a, %d %b %Y %H:%M:%S.%f %z") | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def unix_to_timestamp(unix): | ||||||
|  |     """Convert a Unix timestamp (floating point seconds since epoch) | ||||||
|  |     into a NILM timestamp (integer microseconds since epoch)""" | ||||||
|  |     return int(round(unix * 1e6)) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def timestamp_to_unix(timestamp): | ||||||
|  |     """Convert a NILM timestamp (integer microseconds since epoch) | ||||||
|  |     into a Unix timestamp (floating point seconds since epoch)""" | ||||||
|  |     return timestamp / 1e6 | ||||||
|  |  | ||||||
|  |  | ||||||
|  | seconds_to_timestamp = unix_to_timestamp | ||||||
|  | timestamp_to_seconds = timestamp_to_unix | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def rate_to_period(hz, cycles=1): | ||||||
|  |     """Convert a rate (in Hz) to a period (in timestamp units). | ||||||
|  |     Returns an integer.""" | ||||||
|  |     period = unix_to_timestamp(cycles) / float(hz) | ||||||
|  |     return int(round(period)) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def parse_time(toparse): | ||||||
|  |     """ | ||||||
|  |     Parse a free-form time string and return a nilmdb timestamp | ||||||
|  |     (integer microseconds since epoch).  If the string doesn't contain a | ||||||
|  |     timestamp, the current local timezone is assumed (e.g. from the TZ | ||||||
|  |     env var). | ||||||
|  |     """ | ||||||
|  |     if toparse == "min": | ||||||
|  |         return min_timestamp | ||||||
|  |     if toparse == "max": | ||||||
|  |         return max_timestamp | ||||||
|  |  | ||||||
|  |     # If it starts with @, treat it as a NILM timestamp | ||||||
|  |     # (integer microseconds since epoch) | ||||||
|  |     try: | ||||||
|  |         if toparse[0] == '@': | ||||||
|  |             return int(toparse[1:]) | ||||||
|  |     except (ValueError, KeyError, IndexError): | ||||||
|  |         pass | ||||||
|  |  | ||||||
|  |     # If string isn't "now" and doesn't contain at least 4 digits, | ||||||
|  |     # consider it invalid.  smartparse might otherwise accept | ||||||
|  |     # empty strings and strings with just separators. | ||||||
|  |     if toparse != "now" and len(re.findall(r"\d", toparse)) < 4: | ||||||
|  |         raise ValueError("not enough digits for a timestamp") | ||||||
|  |  | ||||||
|  |     # Try to just parse the time as given | ||||||
|  |     try: | ||||||
|  |         return unix_to_timestamp(datetime_tz.datetime_tz. | ||||||
|  |                                  smartparse(toparse).totimestamp()) | ||||||
|  |     except (ValueError, OverflowError, TypeError): | ||||||
|  |         pass | ||||||
|  |  | ||||||
|  |     # If it's parseable as a float, treat it as a Unix or NILM | ||||||
|  |     # timestamp based on its range. | ||||||
|  |     try: | ||||||
|  |         val = float(toparse) | ||||||
|  |         # range is from about year 2001 - 2128 | ||||||
|  |         if 1e9 < val < 5e9: | ||||||
|  |             return unix_to_timestamp(val) | ||||||
|  |         if 1e15 < val < 5e15: | ||||||
|  |             return val | ||||||
|  |     except ValueError: | ||||||
|  |         pass | ||||||
|  |  | ||||||
|  |     # Try to extract a substring in a condensed format that we expect | ||||||
|  |     # to see in a filename or header comment | ||||||
|  |     res = re.search(r"(^|[^\d])("            # non-numeric or SOL | ||||||
|  |                     r"(199\d|2\d\d\d)"       # year | ||||||
|  |                     r"[-/]?"                 # separator | ||||||
|  |                     r"(0[1-9]|1[012])"       # month | ||||||
|  |                     r"[-/]?"                 # separator | ||||||
|  |                     r"([012]\d|3[01])"       # day | ||||||
|  |                     r"[-T ]?"                # separator | ||||||
|  |                     r"([01]\d|2[0-3])"       # hour | ||||||
|  |                     r"[:]?"                  # separator | ||||||
|  |                     r"([0-5]\d)"             # minute | ||||||
|  |                     r"[:]?"                  # separator | ||||||
|  |                     r"([0-5]\d)?"            # second | ||||||
|  |                     r"([-+]\d\d\d\d)?"       # timezone | ||||||
|  |                     r")", toparse) | ||||||
|  |     if res is not None: | ||||||
|  |         try: | ||||||
|  |             return unix_to_timestamp(datetime_tz.datetime_tz. | ||||||
|  |                                      smartparse(res.group(2)).totimestamp()) | ||||||
|  |         except ValueError: | ||||||
|  |             pass | ||||||
|  |  | ||||||
|  |     # Could also try to successively parse substrings, but let's | ||||||
|  |     # just give up for now. | ||||||
|  |     raise ValueError("unable to parse timestamp") | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def now(): | ||||||
|  |     """Return current timestamp""" | ||||||
|  |     return unix_to_timestamp(time.time()) | ||||||
							
								
								
									
										22
									
								
								nilmdb/utils/timer.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										22
									
								
								nilmdb/utils/timer.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,22 @@ | |||||||
|  | # -*- coding: utf-8 -*- | ||||||
|  |  | ||||||
|  | # Simple timer to time a block of code, for optimization debugging | ||||||
|  | # use like: | ||||||
|  | #   with nilmdb.utils.Timer("flush"): | ||||||
|  | #       foo.flush() | ||||||
|  |  | ||||||
|  | import contextlib | ||||||
|  | import time | ||||||
|  |  | ||||||
|  |  | ||||||
|  | @contextlib.contextmanager | ||||||
|  | def Timer(name=None, tosyslog=False): | ||||||
|  |     start = time.time() | ||||||
|  |     yield | ||||||
|  |     elapsed = int((time.time() - start) * 1000) | ||||||
|  |     msg = (name or 'elapsed') + ": " + str(elapsed) + " ms" | ||||||
|  |     if tosyslog: | ||||||
|  |         import syslog | ||||||
|  |         syslog.syslog(msg) | ||||||
|  |     else: | ||||||
|  |         print(msg) | ||||||
							
								
								
									
										103
									
								
								nilmdb/utils/timestamper.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										103
									
								
								nilmdb/utils/timestamper.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,103 @@ | |||||||
|  | """File-like objects that add timestamps to the input lines""" | ||||||
|  |  | ||||||
|  | from nilmdb.utils.printf import sprintf | ||||||
|  | import nilmdb.utils.time | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class Timestamper(): | ||||||
|  |     """A file-like object that adds timestamps to lines of an input file.""" | ||||||
|  |     def __init__(self, infile, ts_iter): | ||||||
|  |         """file: filename, or another file-like object | ||||||
|  |            ts_iter: iterator that returns a timestamp string for | ||||||
|  |            each line of the file""" | ||||||
|  |         if isinstance(infile, str): | ||||||
|  |             self.file = open(infile, "rb") | ||||||
|  |         else: | ||||||
|  |             self.file = infile | ||||||
|  |         self.ts_iter = ts_iter | ||||||
|  |  | ||||||
|  |     def close(self): | ||||||
|  |         self.file.close() | ||||||
|  |  | ||||||
|  |     def readline(self, *args): | ||||||
|  |         while True: | ||||||
|  |             line = self.file.readline(*args) | ||||||
|  |             if not line: | ||||||
|  |                 return b"" | ||||||
|  |             if line[0:1] == b'#': | ||||||
|  |                 continue | ||||||
|  |             # For some reason, coverage on python 3.8 reports that | ||||||
|  |             # we never hit this break, even though we definitely do. | ||||||
|  |             break  # pragma: no cover | ||||||
|  |         try: | ||||||
|  |             return next(self.ts_iter) + line | ||||||
|  |         except StopIteration: | ||||||
|  |             return b"" | ||||||
|  |  | ||||||
|  |     def readlines(self, size=None): | ||||||
|  |         out = b"" | ||||||
|  |         while True: | ||||||
|  |             line = self.readline() | ||||||
|  |             out += line | ||||||
|  |             if not line or (size and len(out) >= size): | ||||||
|  |                 break | ||||||
|  |         return out | ||||||
|  |  | ||||||
|  |     def __iter__(self): | ||||||
|  |         return self | ||||||
|  |  | ||||||
|  |     def __next__(self): | ||||||
|  |         result = self.readline() | ||||||
|  |         if not result: | ||||||
|  |             raise StopIteration | ||||||
|  |         return result | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class TimestamperRate(Timestamper): | ||||||
|  |     """Timestamper that uses a start time and a fixed rate""" | ||||||
|  |     def __init__(self, infile, start, rate, end=None): | ||||||
|  |         """ | ||||||
|  |         file: file name or object | ||||||
|  |  | ||||||
|  |         start: Unix timestamp for the first value | ||||||
|  |  | ||||||
|  |         rate: 1/rate is added to the timestamp for each line | ||||||
|  |  | ||||||
|  |         end: If specified, raise StopIteration before outputting a value | ||||||
|  |              greater than this.""" | ||||||
|  |         timestamp_to_bytes = nilmdb.utils.time.timestamp_to_bytes | ||||||
|  |         rate_to_period = nilmdb.utils.time.rate_to_period | ||||||
|  |  | ||||||
|  |         def iterator(start, rate, end): | ||||||
|  |             n = 0 | ||||||
|  |             rate = float(rate) | ||||||
|  |             while True: | ||||||
|  |                 now = start + rate_to_period(rate, n) | ||||||
|  |                 if end and now >= end: | ||||||
|  |                     return | ||||||
|  |                 yield timestamp_to_bytes(now) + b" " | ||||||
|  |                 n += 1 | ||||||
|  |         Timestamper.__init__(self, infile, iterator(start, rate, end)) | ||||||
|  |         self.start = start | ||||||
|  |         self.rate = rate | ||||||
|  |  | ||||||
|  |     def __str__(self): | ||||||
|  |         return sprintf("TimestamperRate(..., start=\"%s\", rate=%g)", | ||||||
|  |                        nilmdb.utils.time.timestamp_to_human(self.start), | ||||||
|  |                        self.rate) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class TimestamperNow(Timestamper): | ||||||
|  |     """Timestamper that uses current time""" | ||||||
|  |     def __init__(self, infile): | ||||||
|  |         timestamp_to_bytes = nilmdb.utils.time.timestamp_to_bytes | ||||||
|  |         get_now = nilmdb.utils.time.now | ||||||
|  |  | ||||||
|  |         def iterator(): | ||||||
|  |             while True: | ||||||
|  |                 yield timestamp_to_bytes(get_now()) + b" " | ||||||
|  |  | ||||||
|  |         Timestamper.__init__(self, infile, iterator()) | ||||||
|  |  | ||||||
|  |     def __str__(self): | ||||||
|  |         return "TimestamperNow(...)" | ||||||
| @@ -1,5 +0,0 @@ | |||||||
| all: |  | ||||||
| 	time python test-indexed-read.py |  | ||||||
|  |  | ||||||
| clean: |  | ||||||
| 	rm -f *pyc |  | ||||||
| @@ -1,2 +0,0 @@ | |||||||
| New version from: |  | ||||||
|   http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=641485#15 |  | ||||||
| @@ -1,12 +0,0 @@ | |||||||
| - Make CherryPy server that can handle simple GET/POST, |  | ||||||
|   and a httplib client that can talk to that server. |  | ||||||
|   Steps: |  | ||||||
|   - Make server handle GET |  | ||||||
|   - Make client send request, get response |  | ||||||
|   - Add request streaming to server |  | ||||||
|   - Add request streaming to client |  | ||||||
|   - Make server handle POST |  | ||||||
|   - Make client send request, get response |  | ||||||
|   - Add request streaming to server |  | ||||||
|   - Add request streaming to client |  | ||||||
|   - Integrate into a server process that also keeps database open. |  | ||||||
| @@ -1,3 +0,0 @@ | |||||||
| Indexing time64 doesn't seem to work -- needed to do "time >= 1243052015" even though the actual database times |  | ||||||
| should be something like 1243052015.847000.  Let's switch to just using a 64-bit integer counting e.g. |  | ||||||
| microseconds since 1970-01-01  |  | ||||||
| @@ -1,3 +0,0 @@ | |||||||
| timestamp > 1243052015 |  | ||||||
| took 394.5 minutes in vitables |  | ||||||
| (2340 rows matched) |  | ||||||
| @@ -1,53 +0,0 @@ | |||||||
| import sys |  | ||||||
| import tables |  | ||||||
| import nilmdb |  | ||||||
|  |  | ||||||
| try: |  | ||||||
|     import cherrypy |  | ||||||
|     cherrypy.tools.json_out |  | ||||||
| except: |  | ||||||
|     sys.stderr.write("Cherrypy 3.2+ required\n") |  | ||||||
|     sys.exit(1) |  | ||||||
|  |  | ||||||
| class NilmApp: |  | ||||||
|     def __init__(self, db): |  | ||||||
|         self.db = db |  | ||||||
|  |  | ||||||
| class Root(NilmApp): |  | ||||||
|     """NILM Database""" |  | ||||||
|  |  | ||||||
|     server_version = "1.0" |  | ||||||
|  |  | ||||||
|     @cherrypy.expose |  | ||||||
|     def index(self): |  | ||||||
|         raise cherrypy.NotFound() |  | ||||||
|  |  | ||||||
|     @cherrypy.expose |  | ||||||
|     def favicon_ico(self): |  | ||||||
|         raise cherrypy.NotFound() |  | ||||||
|  |  | ||||||
|     @cherrypy.expose |  | ||||||
|     @cherrypy.tools.json_out() |  | ||||||
|     def version(self): |  | ||||||
|         return self.server_version |  | ||||||
|  |  | ||||||
| class Stream(NilmApp): |  | ||||||
|     """Stream operations""" |  | ||||||
|  |  | ||||||
|     @cherrypy.expose |  | ||||||
|     @cherrypy.tools.json_out() |  | ||||||
|     def list(self): |  | ||||||
|         return  |  | ||||||
|      |  | ||||||
| cherrypy.config.update({ |  | ||||||
|     'server.socket_host': '127.0.0.1', |  | ||||||
|     'server.socket_port': 12380 |  | ||||||
|     }) |  | ||||||
|  |  | ||||||
| db = nilmdb.nilmdb() |  | ||||||
| cherrypy.tree.mount(Root(db), "/") |  | ||||||
| cherrypy.tree.mount(Stream(db), "/stream") |  | ||||||
|  |  | ||||||
| if __name__ == "__main__": |  | ||||||
|     cherrypy.engine.start() |  | ||||||
|     cherrypy.engine.block() |  | ||||||
| @@ -1,16 +0,0 @@ | |||||||
| import tables |  | ||||||
| import numpy |  | ||||||
|  |  | ||||||
| class RawSample(tables.IsDescription): |  | ||||||
|     timestamp = tables.UInt64Col() |  | ||||||
|     voltage   = tables.UInt16Col(shape = 3) |  | ||||||
|     current   = tables.UInt16Col(shape = 3) |  | ||||||
|  |  | ||||||
| h5file = tables.openFile("test.h5", mode = "w", title = "Test") |  | ||||||
| group = h5file.createGroup("/", "raw", "Raw Data") |  | ||||||
| table = h5file.createTable(group, "nilm1", RawSample, "NILM 1") |  | ||||||
|  |  | ||||||
| print repr(h5file) |  | ||||||
|  |  | ||||||
| # write rows |  | ||||||
|  |  | ||||||
| @@ -1,54 +0,0 @@ | |||||||
| #!/usr/bin/python |  | ||||||
|  |  | ||||||
| from tables import * |  | ||||||
| import re |  | ||||||
| import time |  | ||||||
|  |  | ||||||
| # A class to describe our data |  | ||||||
| class PrepData(IsDescription): |  | ||||||
|     timestamp = Int64Col() |  | ||||||
|     p1 = Float32Col() |  | ||||||
|     q1 = Float32Col() |  | ||||||
|     p3 = Float32Col() |  | ||||||
|     q3 = Float32Col() |  | ||||||
|     p5 = Float32Col() |  | ||||||
|     q5 = Float32Col() |  | ||||||
|     p7 = Float32Col() |  | ||||||
|     q7 = Float32Col() |  | ||||||
|  |  | ||||||
| filename = "test.h5" |  | ||||||
| h5file = openFile(filename, mode = "w", title = "NILM Test") |  | ||||||
|  |  | ||||||
| group = h5file.createGroup("/", "newton", "Newton school") |  | ||||||
| table = h5file.createTable(group, "prep", PrepData, "Prep Data", expectedrows = 120 * 86400 * 90) |  | ||||||
|  |  | ||||||
| table.cols.timestamp.createIndex() |  | ||||||
|  |  | ||||||
| for i in range(0, 80): |  | ||||||
|     # Open file |  | ||||||
|     data = open("data/alldata") |  | ||||||
|     count = 0 |  | ||||||
|     oldtime = time.time() |  | ||||||
|     prep = table.row |  | ||||||
|     for line in data: |  | ||||||
|         count = count + 1 |  | ||||||
|         if count % 1000000 == 0: |  | ||||||
|             print str(i) + ": " + str((time.time() - oldtime)) + ", total " + str(count/1000000) + "m lines" |  | ||||||
|             oldtime = time.time() |  | ||||||
|         v = re.split('\s+', line) |  | ||||||
|         prep['timestamp'] = int(v[0]) + 500000000 * i |  | ||||||
|         prep['p1'] = v[1] |  | ||||||
|         prep['q1'] = v[2] |  | ||||||
|         prep['p3'] = v[3] |  | ||||||
|         prep['q3'] = v[4] |  | ||||||
|         prep['p5'] = v[5] |  | ||||||
|         prep['q5'] = v[6] |  | ||||||
|         prep['p7'] = v[7] |  | ||||||
|         prep['q7'] = v[8] |  | ||||||
|         prep.append() |  | ||||||
|     data.close() |  | ||||||
|  |  | ||||||
| h5file.close() |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
| @@ -1,54 +0,0 @@ | |||||||
| #!/usr/bin/python |  | ||||||
|  |  | ||||||
| from tables import * |  | ||||||
| import re |  | ||||||
| import time |  | ||||||
|  |  | ||||||
| # A class to describe our data |  | ||||||
| class PrepData(IsDescription): |  | ||||||
|     timestamp = Time64Col() |  | ||||||
|     p1 = Float32Col() |  | ||||||
|     q1 = Float32Col() |  | ||||||
|     p3 = Float32Col() |  | ||||||
|     q3 = Float32Col() |  | ||||||
|     p5 = Float32Col() |  | ||||||
|     q5 = Float32Col() |  | ||||||
|     p7 = Float32Col() |  | ||||||
|     q7 = Float32Col() |  | ||||||
|  |  | ||||||
| filename = "test.h5" |  | ||||||
| h5file = openFile(filename, mode = "w", title = "NILM Test") |  | ||||||
|  |  | ||||||
| group = h5file.createGroup("/", "newton", "Newton school") |  | ||||||
| table = h5file.createTable(group, "prep", PrepData, "Prep Data") |  | ||||||
|  |  | ||||||
| table.cols.timestamp.createIndex() |  | ||||||
|  |  | ||||||
| for i in range(0, 80): |  | ||||||
|     # Open file |  | ||||||
|     data = open("data/alldata") |  | ||||||
|     count = 0 |  | ||||||
|     oldtime = time.time() |  | ||||||
|     prep = table.row |  | ||||||
|     for line in data: |  | ||||||
|         count = count + 1 |  | ||||||
|         if count % 1000000 == 0: |  | ||||||
|             print str(i) + ": " + str((time.time() - oldtime)) + ", total " + str(count/1000000) + "m lines" |  | ||||||
|             oldtime = time.time() |  | ||||||
|         v = re.split('\s+', line) |  | ||||||
|         prep['timestamp'] = float(v[0]) / 1000.0 + 500000 * i |  | ||||||
|         prep['p1'] = v[1] |  | ||||||
|         prep['q1'] = v[2] |  | ||||||
|         prep['p3'] = v[3] |  | ||||||
|         prep['q3'] = v[4] |  | ||||||
|         prep['p5'] = v[5] |  | ||||||
|         prep['q5'] = v[6] |  | ||||||
|         prep['p7'] = v[7] |  | ||||||
|         prep['q7'] = v[8] |  | ||||||
|         prep.append() |  | ||||||
|     data.close() |  | ||||||
|  |  | ||||||
| h5file.close() |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
							
								
								
									
										41
									
								
								requirements.txt
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										41
									
								
								requirements.txt
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,41 @@ | |||||||
|  | argcomplete==1.12.0 | ||||||
|  | CherryPy==18.6.0 | ||||||
|  | coverage==5.2.1 | ||||||
|  | Cython==0.29.21 | ||||||
|  | decorator==4.4.2 | ||||||
|  | fallocate==1.6.4 | ||||||
|  | flake8==3.8.3 | ||||||
|  | nose==1.3.7 | ||||||
|  | numpy==1.19.1 | ||||||
|  | progressbar==2.5 | ||||||
|  | psutil==5.7.2 | ||||||
|  | python-datetime-tz==0.5.4 | ||||||
|  | python-dateutil==2.8.1 | ||||||
|  | requests==2.24.0 | ||||||
|  | tz==0.2.2 | ||||||
|  | yappi==1.2.5 | ||||||
|  |  | ||||||
|  | ## The following requirements were added by pip freeze: | ||||||
|  | beautifulsoup4==4.9.1 | ||||||
|  | certifi==2020.6.20 | ||||||
|  | chardet==3.0.4 | ||||||
|  | cheroot==8.4.2 | ||||||
|  | idna==2.10 | ||||||
|  | jaraco.classes==3.1.0 | ||||||
|  | jaraco.collections==3.0.0 | ||||||
|  | jaraco.functools==3.0.1 | ||||||
|  | jaraco.text==3.2.0 | ||||||
|  | mccabe==0.6.1 | ||||||
|  | more-itertools==8.4.0 | ||||||
|  | portend==2.6 | ||||||
|  | pycodestyle==2.6.0 | ||||||
|  | pyflakes==2.2.0 | ||||||
|  | pytz==2020.1 | ||||||
|  | six==1.15.0 | ||||||
|  | soupsieve==2.0.1 | ||||||
|  | tempora==4.0.0 | ||||||
|  | urllib3==1.25.10 | ||||||
|  | waitress==1.4.4 | ||||||
|  | WebOb==1.8.6 | ||||||
|  | WebTest==2.0.35 | ||||||
|  | zc.lockfile==2.0 | ||||||
							
								
								
									
										50
									
								
								setup.cfg
									
									
									
									
									
								
							
							
						
						
									
										50
									
								
								setup.cfg
									
									
									
									
									
								
							| @@ -1,9 +1,59 @@ | |||||||
|  | [aliases] | ||||||
|  | test = nosetests | ||||||
|  |  | ||||||
| [nosetests] | [nosetests] | ||||||
|  | # Note: values must be set to 1, and have no comments on the same line, | ||||||
|  | # for "python setup.py nosetests" to work correctly. | ||||||
| nocapture=1 | nocapture=1 | ||||||
|  | # Comment this out to see CherryPy logs on failure: | ||||||
|  | nologcapture=1 | ||||||
| with-coverage=1 | with-coverage=1 | ||||||
| cover-inclusive=1 | cover-inclusive=1 | ||||||
| cover-package=nilmdb | cover-package=nilmdb | ||||||
| cover-erase=1 | cover-erase=1 | ||||||
|  | # this works, puts html output in cover/ dir: | ||||||
|  | # cover-html=1 | ||||||
|  | #debug=nose | ||||||
|  | #debug-log=nose.log | ||||||
| stop=1 | stop=1 | ||||||
| verbosity=2 | verbosity=2 | ||||||
|  | tests=tests | ||||||
|  | #tests=tests/test_threadsafety.py | ||||||
|  | #tests=tests/test_bulkdata.py | ||||||
|  | #tests=tests/test_mustclose.py | ||||||
|  | #tests=tests/test_lrucache.py | ||||||
|  | #tests=tests/test_cmdline.py | ||||||
|  | #tests=tests/test_layout.py | ||||||
|  | #tests=tests/test_rbtree.py | ||||||
|  | #tests=tests/test_interval.py | ||||||
|  | #tests=tests/test_rbtree.py,tests/test_interval.py | ||||||
|  | #tests=tests/test_interval.py | ||||||
|  | #tests=tests/test_client.py | ||||||
|  | #tests=tests/test_timestamper.py | ||||||
|  | #tests=tests/test_serializer.py | ||||||
|  | #tests=tests/test_iteratorizer.py | ||||||
|  | #tests=tests/test_client.py:TestClient.test_client_nilmdb | ||||||
|  | #tests=tests/test_nilmdb.py | ||||||
|  | #with-profile=1 | ||||||
|  | #profile-sort=time | ||||||
|  | ##profile-restrict=10  # doesn't work right, treated as string or something | ||||||
|  |  | ||||||
|  | [versioneer] | ||||||
|  | VCS=git | ||||||
|  | style=pep440 | ||||||
|  | versionfile_source=nilmdb/_version.py | ||||||
|  | versionfile_build=nilmdb/_version.py | ||||||
|  | tag_prefix=nilmdb- | ||||||
|  | parentdir_prefix=nilmdb- | ||||||
|  |  | ||||||
|  | [flake8] | ||||||
|  | exclude=_version.py | ||||||
|  | extend-ignore=E731 | ||||||
|  | per-file-ignores=__init__.py:F401,E402 \ | ||||||
|  |         serializer.py:E722 \ | ||||||
|  |         mustclose.py:E722 \ | ||||||
|  |         fsck.py:E266 | ||||||
|  |  | ||||||
|  | [pylint] | ||||||
|  | ignore=_version.py | ||||||
|  | disable=C0103,C0111,R0913,R0914 | ||||||
|   | |||||||
							
								
								
									
										67
									
								
								setup.py
									
									
									
									
									
								
							
							
						
						
									
										67
									
								
								setup.py
									
									
									
									
									
								
							| @@ -1,9 +1,64 @@ | |||||||
| #!/usr/bin/python | #!/usr/bin/env python3 | ||||||
|  |  | ||||||
| from distutils.core import setup | # To release a new version, tag it: | ||||||
|  | #   git tag -a nilmdb-1.1 -m "Version 1.1" | ||||||
|  | #   git push --tags | ||||||
|  | # Then just package it up: | ||||||
|  | #   python3 setup.py sdist | ||||||
|  |  | ||||||
| setup(name = 'nilmdb', | import sys | ||||||
|       version = '1.0', | import os | ||||||
|       scripts = [ 'bin/nilm-test.py' ], | from setuptools import setup | ||||||
|       packages = [ 'nilmdb' ], | from distutils.extension import Extension | ||||||
|  |  | ||||||
|  | # Versioneer manages version numbers from git tags. | ||||||
|  | # https://github.com/warner/python-versioneer | ||||||
|  | import versioneer | ||||||
|  |  | ||||||
|  | # External modules that need to be built | ||||||
|  | ext_modules = [ Extension('nilmdb.server.rocket', ['nilmdb/server/rocket.c' ]) ] | ||||||
|  |  | ||||||
|  | # Use Cython. | ||||||
|  | cython_modules = [ 'nilmdb.server.interval', 'nilmdb.server.rbtree' ] | ||||||
|  | import Cython | ||||||
|  | from Cython.Build import cythonize | ||||||
|  | for modulename in cython_modules: | ||||||
|  |     filename = modulename.replace('.','/') | ||||||
|  |     ext_modules.extend(cythonize(filename + ".pyx")) | ||||||
|  |  | ||||||
|  | # Get list of requirements to use in `install_requires` below.  Note | ||||||
|  | # that we don't make a distinction between things that are actually | ||||||
|  | # required for end-users vs developers (or use `test_requires` or | ||||||
|  | # anything else) -- just install everything for simplicity. | ||||||
|  | install_requires = open('requirements.txt').readlines() | ||||||
|  |  | ||||||
|  | # Run setup | ||||||
|  | setup(name='nilmdb', | ||||||
|  |       version = versioneer.get_version(), | ||||||
|  |       cmdclass = versioneer.get_cmdclass(), | ||||||
|  |       url = 'https://git.jim.sh/nilm/nilmdb.git', | ||||||
|  |       author = 'Jim Paris', | ||||||
|  |       description = "NILM Database", | ||||||
|  |       long_description = "NILM Database", | ||||||
|  |       license = "Proprietary", | ||||||
|  |       author_email = 'jim@jtan.com', | ||||||
|  |       setup_requires = [ 'setuptools' ], | ||||||
|  |       install_requires = install_requires, | ||||||
|  |       packages = [ 'nilmdb', | ||||||
|  |                    'nilmdb.utils', | ||||||
|  |                    'nilmdb.server', | ||||||
|  |                    'nilmdb.client', | ||||||
|  |                    'nilmdb.cmdline', | ||||||
|  |                    'nilmdb.scripts', | ||||||
|  |                    'nilmdb.fsck', | ||||||
|  |                    ], | ||||||
|  |       entry_points = { | ||||||
|  |           'console_scripts': [ | ||||||
|  |               'nilmtool = nilmdb.scripts.nilmtool:main', | ||||||
|  |               'nilmdb-server = nilmdb.scripts.nilmdb_server:main', | ||||||
|  |               'nilmdb-fsck = nilmdb.scripts.nilmdb_fsck:main', | ||||||
|  |               ], | ||||||
|  |           }, | ||||||
|  |       ext_modules = ext_modules, | ||||||
|  |       zip_safe = False, | ||||||
|       ) |       ) | ||||||
|   | |||||||
| @@ -1,5 +0,0 @@ | |||||||
| all: |  | ||||||
| 	python speed-readascii.py |  | ||||||
|  |  | ||||||
| clean: |  | ||||||
| 	rm -f *pyc |  | ||||||
| @@ -1,4 +0,0 @@ | |||||||
| from __future__ import print_function |  | ||||||
| def printf(str, *args): |  | ||||||
|     print(str % args, end='') |  | ||||||
|      |  | ||||||
| @@ -1,67 +0,0 @@ | |||||||
| #!/usr/bin/python |  | ||||||
|  |  | ||||||
| from printf import printf |  | ||||||
| import time |  | ||||||
| import re |  | ||||||
| import numpy as np |  | ||||||
| import itertools |  | ||||||
|  |  | ||||||
| class Timer(): |  | ||||||
|     def __init__(self, arg): |  | ||||||
|         self.arg = arg |  | ||||||
|     def __enter__(self): self.start = time.time() |  | ||||||
|     def __exit__(self, *args): printf("%s: %f lines/sec\n", self.arg, 1e6 / (time.time() - self.start)) |  | ||||||
|  |  | ||||||
| def test_split(): |  | ||||||
|     for n, line in enumerate(open('1m.raw', 'r')): |  | ||||||
|         out = [0]*6 |  | ||||||
|         tmp = [ int(i) for i in line.partition('#')[0].split() ] |  | ||||||
|         out[0:len(tmp)] = tmp |  | ||||||
|         if (n % 100000 == 0): |  | ||||||
|             printf("line %d = %s\n", n, str(out)) |  | ||||||
|  |  | ||||||
| def test_split2(): |  | ||||||
|     for n, line in enumerate(open('1m.raw', 'r')): |  | ||||||
|         out = [0]*6 |  | ||||||
|         tmp = [ int(i,10) for i in line.partition('#')[0].split() ] |  | ||||||
|         out[0:len(tmp)] = tmp |  | ||||||
|         if (n % 100000 == 0): |  | ||||||
|             printf("line %d = %s\n", n, str(out)) |  | ||||||
|  |  | ||||||
| def test_regex(): |  | ||||||
|     for n, line in enumerate(open('1m.raw', 'r')): |  | ||||||
|         out = [0]*6 |  | ||||||
|         tmp = [ int(x) for x in re.findall('(\d+)\s+',line.partition('#')[0]) ] |  | ||||||
|         out[0:len(tmp)] = tmp |  | ||||||
|         if (n % 100000 == 0): |  | ||||||
|             printf("line %d = %s\n", n, str(out)) |  | ||||||
|  |  | ||||||
| def test_bigregex(): |  | ||||||
|     regex = re.compile('^(?:\s*)' + '(?:(\d+)\s+)?' * 6) |  | ||||||
|     for n, line in enumerate(open('1m.raw', 'r')): |  | ||||||
|         out = [ int(x or 0) for x in re.match(regex, line).groups() ] |  | ||||||
|         if (n % 100000 == 0): |  | ||||||
|             printf("line %d = %s\n", n, str(out)) |  | ||||||
|  |  | ||||||
| def test_numpy(): |  | ||||||
|     out = np.genfromtxt(open('1m.raw', 'r'), |  | ||||||
|                         dtype = np.dtype('i2,i2,i2,i2,i2,i2')) |  | ||||||
|      |  | ||||||
| with Timer("numpy"): |  | ||||||
|     test_numpy() # 106k/sec |  | ||||||
|  |  | ||||||
| with Timer("regex"): |  | ||||||
|     test_regex() # 121k/sec |  | ||||||
|  |  | ||||||
| with Timer("split"): |  | ||||||
|     test_split() # 219k/sec |  | ||||||
|  |  | ||||||
| with Timer("split2"): |  | ||||||
|     test_split2() # 328k/sec |  | ||||||
|  |  | ||||||
| with Timer("bigregex"): |  | ||||||
|     test_bigregex() # 130k/sec |  | ||||||
|  |  | ||||||
| # The "int" operation takes quite a while -- int(x,10) is twice as fast |  | ||||||
| # Perl does about 500k/sec |  | ||||||
|  |  | ||||||
| @@ -1,74 +0,0 @@ | |||||||
| #!/usr/bin/python |  | ||||||
|  |  | ||||||
| from printf import printf |  | ||||||
| import time |  | ||||||
| import re |  | ||||||
| import numpy as np |  | ||||||
| import itertools |  | ||||||
| import struct |  | ||||||
| import array |  | ||||||
| import os |  | ||||||
| import mmap |  | ||||||
|  |  | ||||||
| class Timer(): |  | ||||||
|     def __init__(self, arg): |  | ||||||
|         self.arg = arg |  | ||||||
|     def __enter__(self): self.start = time.time() |  | ||||||
|     def __exit__(self, *args): printf("%s: %f klines/sec\n", self.arg, 1e3 / (time.time() - self.start)) |  | ||||||
|  |  | ||||||
| def test_struct1(): |  | ||||||
|     """read with struct.unpack""" |  | ||||||
|     f = open('1m.bin', 'rb') |  | ||||||
|     f.seek(0,os.SEEK_END) |  | ||||||
|     filesize = f.tell() |  | ||||||
|     f.seek(0,os.SEEK_SET) |  | ||||||
|     packer = struct.Struct('!dHHHHHH') |  | ||||||
|     items = filesize / packer.size |  | ||||||
|     for n in xrange(items): |  | ||||||
|         s = f.read(packer.size) |  | ||||||
|         out = packer.unpack(s) |  | ||||||
|         if (n % 100000 == 0): |  | ||||||
|             printf("line %d = %s\n", n, str(out)) |  | ||||||
|  |  | ||||||
| def test_struct2(): |  | ||||||
|     """read with struct.unpack, convert to string""" |  | ||||||
|     f = open('1m.bin', 'rb') |  | ||||||
|     f.seek(0,os.SEEK_END) |  | ||||||
|     filesize = f.tell() |  | ||||||
|     f.seek(0,os.SEEK_SET) |  | ||||||
|     packer = struct.Struct('!dHHHHHH') |  | ||||||
|     items = filesize / packer.size |  | ||||||
|     for n in xrange(items): |  | ||||||
|         s = f.read(packer.size) |  | ||||||
|         out = packer.unpack(s) |  | ||||||
|         x = str(out) |  | ||||||
|         if (n % 100000 == 0): |  | ||||||
|             printf("line %d = %s\n", n, str(out)) |  | ||||||
|  |  | ||||||
| def test_mmap(): |  | ||||||
|     """struct.unpack with mmap""" |  | ||||||
|     with open('1m.bin', 'rb') as f: |  | ||||||
|         f.seek(0,os.SEEK_END) |  | ||||||
|         filesize = f.tell() |  | ||||||
|         f.seek(0,os.SEEK_SET) |  | ||||||
|         m = mmap.mmap(f.fileno(), filesize, access=mmap.ACCESS_READ) |  | ||||||
|         packer = struct.Struct('!dHHHHHH') |  | ||||||
|         items = filesize / packer.size |  | ||||||
|         for n in xrange(items): |  | ||||||
|             out = packer.unpack(m[packer.size*n : packer.size*(n+1)]) |  | ||||||
|             if (n % 100000 == 0): |  | ||||||
|                 printf("line %d = %s\n", n, str(out)) |  | ||||||
|  |  | ||||||
| with Timer("mmap"): |  | ||||||
|     test_mmap()  # 1600k |  | ||||||
|  |  | ||||||
| with Timer("struct1"): |  | ||||||
|     test_struct1()  # 1460k |  | ||||||
|  |  | ||||||
| with Timer("struct2"): |  | ||||||
|     test_struct2()  # 210k |  | ||||||
|  |  | ||||||
| # Reading from the file is again much quicker than converting to string |  | ||||||
| # Use mmap, it's good |  | ||||||
|  |  | ||||||
|  |  | ||||||
| @@ -1,76 +0,0 @@ | |||||||
| #!/usr/bin/python |  | ||||||
|  |  | ||||||
| from printf import printf |  | ||||||
| import time |  | ||||||
| import re |  | ||||||
| import numpy as np |  | ||||||
| import itertools |  | ||||||
| import struct |  | ||||||
| import array |  | ||||||
|  |  | ||||||
| class Timer(): |  | ||||||
|     def __init__(self, arg): |  | ||||||
|         self.arg = arg |  | ||||||
|     def __enter__(self): self.start = time.time() |  | ||||||
|     def __exit__(self, *args): printf("%s: %f klines/sec\n", self.arg, 1e3 / (time.time() - self.start)) |  | ||||||
|  |  | ||||||
| def read_ascii(): |  | ||||||
|     for n in xrange(1000000): |  | ||||||
|         yield (1234, 2345, 3456, 4576, 5678, 6789) |  | ||||||
| #    for n, line in enumerate(open('1m.raw', 'r')): |  | ||||||
| #        out = [0]*6 |  | ||||||
| #        tmp = [ int(i,10) for i in line.partition('#')[0].split() ] |  | ||||||
| #        out[0:len(tmp)] = tmp |  | ||||||
| #        if (n % 100000 == 0): |  | ||||||
| #            printf("line %d = %s\n", n, str(out)) |  | ||||||
| #        yield out |  | ||||||
|  |  | ||||||
| def test_struct1(): |  | ||||||
|     """write with struct.pack""" |  | ||||||
|     f = open('1m.bin', 'wb') |  | ||||||
|     for out in read_ascii(): |  | ||||||
|         s = struct.pack('!HHHHHH', *out) |  | ||||||
|         f.write(s) |  | ||||||
|  |  | ||||||
| def test_struct2(): |  | ||||||
|     """use constant format string""" |  | ||||||
|     f = open('1m.bin', 'wb') |  | ||||||
|     packer = struct.Struct('!HHHHHH') |  | ||||||
|     for out in read_ascii(): |  | ||||||
|         f.write(packer.pack(*out)) |  | ||||||
|     f.close() |  | ||||||
|     printf("size was %d\n", packer.size) |  | ||||||
|  |  | ||||||
| def test_struct3(): |  | ||||||
|     """like struct1, with timestamp""" |  | ||||||
|     f = open('1m.bin', 'wb') |  | ||||||
|     for out in read_ascii(): |  | ||||||
|         s = struct.pack('!dHHHHHH', time.time(), *out) |  | ||||||
|         f.write(s) |  | ||||||
|  |  | ||||||
| def test_struct4(): |  | ||||||
|     """like struct2, with timestamp""" |  | ||||||
|     f = open('1m.bin', 'wb') |  | ||||||
|     packer = struct.Struct('!dHHHHHH') |  | ||||||
|     for out in read_ascii(): |  | ||||||
|         f.write(packer.pack(time.time(), *out)) |  | ||||||
|     f.close() |  | ||||||
|     printf("size was %d\n", packer.size) |  | ||||||
|  |  | ||||||
| #raise Exception('done') |  | ||||||
|  |  | ||||||
| with Timer("struct1"): |  | ||||||
|     test_struct1() # 1089k |  | ||||||
|  |  | ||||||
| with Timer("struct2"): |  | ||||||
|     test_struct2() # 1249k |  | ||||||
|  |  | ||||||
| with Timer("struct3"): |  | ||||||
|     test_struct3() # 845k |  | ||||||
|  |  | ||||||
| with Timer("struct4"): |  | ||||||
|     test_struct4() # 922k |  | ||||||
|  |  | ||||||
| # This seems fast enough for writing new data, since it's faster than |  | ||||||
| # we read ascii data anyway.  Use e.g. struct4 |  | ||||||
|  |  | ||||||
| @@ -1,11 +0,0 @@ | |||||||
| #!/usr/bin/python |  | ||||||
|  |  | ||||||
| import struct |  | ||||||
| import mmap |  | ||||||
|  |  | ||||||
| f = open("test.dat", "rb+") |  | ||||||
| mm = mmap.mmap(f.fileno(),3) |  | ||||||
|  |  | ||||||
| print len(mm) |  | ||||||
| print "first 3 bytes: " + mm[0:3]; |  | ||||||
|  |  | ||||||
| @@ -1 +0,0 @@ | |||||||
|  |  | ||||||
| @@ -1,7 +0,0 @@ | |||||||
| - Have a class representing the file contents |  | ||||||
| - Looks like an array |  | ||||||
|   - len(), get(), index |  | ||||||
|   - some form of bisect search |  | ||||||
|   - get_extents = return [0].timestamp, [-1].timestamp |  | ||||||
|   -  |  | ||||||
| - Can append?  Sure, why not.  Just write to the file, extend mmap accordingly. |  | ||||||
							
								
								
									
										124
									
								
								tests/data/extract-1
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										124
									
								
								tests/data/extract-1
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,124 @@ | |||||||
|  | # path: /newton/prep | ||||||
|  | # layout: float32_8 | ||||||
|  | # start: Fri, 23 Mar 2012 10:00:30.000000 +0000 | ||||||
|  | # end: Fri, 23 Mar 2012 10:00:31.000000 +0000 | ||||||
|  | 1332496830000000 2.517740e+05 2.242410e+05 5.688100e+03 1.915530e+03 9.329220e+03 4.183710e+03 1.212350e+03 2.641790e+03 | ||||||
|  | 1332496830008333 2.595670e+05 2.226980e+05 6.207600e+03 6.786720e+02 9.380230e+03 4.575580e+03 2.830610e+03 2.688630e+03 | ||||||
|  | 1332496830016667 2.630730e+05 2.233040e+05 4.961640e+03 2.197120e+03 7.687310e+03 4.861860e+03 2.732780e+03 3.008540e+03 | ||||||
|  | 1332496830025000 2.576140e+05 2.233230e+05 5.003660e+03 3.525140e+03 7.165310e+03 4.685620e+03 1.715380e+03 3.440480e+03 | ||||||
|  | 1332496830033333 2.557800e+05 2.219150e+05 6.357310e+03 2.145290e+03 8.426970e+03 3.775350e+03 1.475390e+03 3.797240e+03 | ||||||
|  | 1332496830041667 2.601660e+05 2.230080e+05 6.702590e+03 1.484960e+03 9.288100e+03 3.330830e+03 1.228500e+03 3.214320e+03 | ||||||
|  | 1332496830050000 2.612310e+05 2.264260e+05 4.980060e+03 2.982380e+03 8.499630e+03 4.267670e+03 9.940890e+02 2.292890e+03 | ||||||
|  | 1332496830058333 2.551170e+05 2.266420e+05 4.584410e+03 4.656440e+03 7.860150e+03 5.317310e+03 1.473600e+03 2.111690e+03 | ||||||
|  | 1332496830066667 2.533000e+05 2.235540e+05 6.455090e+03 3.036650e+03 8.869750e+03 4.986310e+03 2.607360e+03 2.839590e+03 | ||||||
|  | 1332496830075000 2.610610e+05 2.212630e+05 6.951980e+03 1.500240e+03 9.386100e+03 3.791680e+03 2.677010e+03 3.980630e+03 | ||||||
|  | 1332496830083333 2.665030e+05 2.231980e+05 5.189610e+03 2.594560e+03 8.571530e+03 3.175000e+03 9.198400e+02 3.792010e+03 | ||||||
|  | 1332496830091667 2.606920e+05 2.251840e+05 3.782480e+03 4.642880e+03 7.662960e+03 3.917790e+03 -2.510970e+02 2.907060e+03 | ||||||
|  | 1332496830100000 2.539630e+05 2.250810e+05 5.123530e+03 3.839550e+03 8.669030e+03 4.877820e+03 9.437240e+02 2.527450e+03 | ||||||
|  | 1332496830108333 2.565550e+05 2.241690e+05 5.930600e+03 2.298540e+03 8.906710e+03 5.331680e+03 2.549910e+03 3.053560e+03 | ||||||
|  | 1332496830116667 2.608890e+05 2.250100e+05 4.681130e+03 2.971870e+03 7.900040e+03 4.874080e+03 2.322430e+03 3.649120e+03 | ||||||
|  | 1332496830125000 2.579440e+05 2.249230e+05 3.291140e+03 4.357090e+03 7.131590e+03 4.385560e+03 1.077050e+03 3.664040e+03 | ||||||
|  | 1332496830133333 2.550090e+05 2.230180e+05 4.584820e+03 2.864000e+03 8.469490e+03 3.625580e+03 9.855570e+02 3.504230e+03 | ||||||
|  | 1332496830141667 2.601140e+05 2.219470e+05 5.676190e+03 1.210340e+03 9.393780e+03 3.390240e+03 1.654020e+03 3.018700e+03 | ||||||
|  | 1332496830150000 2.642770e+05 2.244380e+05 4.446620e+03 2.176720e+03 8.142090e+03 4.584880e+03 2.327830e+03 2.615800e+03 | ||||||
|  | 1332496830158333 2.592210e+05 2.264710e+05 2.734440e+03 4.182760e+03 6.389550e+03 5.540520e+03 1.958880e+03 2.720120e+03 | ||||||
|  | 1332496830166667 2.526500e+05 2.248310e+05 4.163640e+03 2.989990e+03 7.179200e+03 5.213060e+03 1.929550e+03 3.457660e+03 | ||||||
|  | 1332496830175000 2.570830e+05 2.220480e+05 5.759040e+03 7.024410e+02 8.566550e+03 3.552020e+03 1.832940e+03 3.956190e+03 | ||||||
|  | 1332496830183333 2.631300e+05 2.229670e+05 5.141140e+03 1.166120e+03 8.666960e+03 2.720370e+03 9.713740e+02 3.479730e+03 | ||||||
|  | 1332496830191667 2.602360e+05 2.252650e+05 3.425140e+03 3.339080e+03 7.853610e+03 3.674950e+03 5.259080e+02 2.443310e+03 | ||||||
|  | 1332496830200000 2.535030e+05 2.245270e+05 4.398130e+03 2.927430e+03 8.110280e+03 4.842470e+03 1.513870e+03 2.467100e+03 | ||||||
|  | 1332496830208333 2.561260e+05 2.226930e+05 6.043530e+03 6.562240e+02 8.797560e+03 4.832410e+03 2.832370e+03 3.426140e+03 | ||||||
|  | 1332496830216667 2.616770e+05 2.236080e+05 5.830460e+03 1.033910e+03 8.123940e+03 3.980690e+03 1.927960e+03 4.092720e+03 | ||||||
|  | 1332496830225000 2.594570e+05 2.255360e+05 4.015570e+03 2.995990e+03 7.135440e+03 3.713550e+03 3.072200e+02 3.849430e+03 | ||||||
|  | 1332496830233333 2.533520e+05 2.242160e+05 4.650560e+03 3.196620e+03 8.131280e+03 3.586160e+03 7.083230e+01 3.074180e+03 | ||||||
|  | 1332496830241667 2.561240e+05 2.215130e+05 6.100480e+03 8.219800e+02 9.757540e+03 3.474510e+03 1.647520e+03 2.559860e+03 | ||||||
|  | 1332496830250000 2.630240e+05 2.215590e+05 5.789960e+03 6.994170e+02 9.129740e+03 4.153080e+03 2.829250e+03 2.677270e+03 | ||||||
|  | 1332496830258333 2.617200e+05 2.240150e+05 4.358500e+03 2.645360e+03 7.414110e+03 4.810670e+03 2.225990e+03 3.185990e+03 | ||||||
|  | 1332496830266667 2.547560e+05 2.242400e+05 4.857380e+03 3.229680e+03 7.539310e+03 4.769140e+03 1.507130e+03 3.668260e+03 | ||||||
|  | 1332496830275000 2.568890e+05 2.226580e+05 6.473420e+03 1.214110e+03 9.010760e+03 3.848730e+03 1.303840e+03 3.778500e+03 | ||||||
|  | 1332496830283333 2.642080e+05 2.233160e+05 5.700450e+03 1.116560e+03 9.087610e+03 3.846680e+03 1.293590e+03 2.891560e+03 | ||||||
|  | 1332496830291667 2.633100e+05 2.257190e+05 3.936120e+03 3.252360e+03 7.552850e+03 4.897860e+03 1.156630e+03 2.037160e+03 | ||||||
|  | 1332496830300000 2.550790e+05 2.250860e+05 4.536450e+03 3.960110e+03 7.454590e+03 5.479070e+03 1.596360e+03 2.190800e+03 | ||||||
|  | 1332496830308333 2.544870e+05 2.225080e+05 6.635860e+03 1.758850e+03 8.732970e+03 4.466970e+03 2.650360e+03 3.139310e+03 | ||||||
|  | 1332496830316667 2.612410e+05 2.224320e+05 6.702270e+03 1.085130e+03 8.989230e+03 3.112990e+03 1.933560e+03 3.828410e+03 | ||||||
|  | 1332496830325000 2.621190e+05 2.255870e+05 4.714950e+03 2.892360e+03 8.107820e+03 2.961310e+03 2.399780e+02 3.273720e+03 | ||||||
|  | 1332496830333333 2.549990e+05 2.265140e+05 4.532090e+03 4.126900e+03 8.200130e+03 3.872590e+03 5.608900e+01 2.370580e+03 | ||||||
|  | 1332496830341667 2.542890e+05 2.240330e+05 6.538810e+03 2.251440e+03 9.419430e+03 4.564450e+03 2.077810e+03 2.508170e+03 | ||||||
|  | 1332496830350000 2.618900e+05 2.219600e+05 6.846090e+03 1.475270e+03 9.125590e+03 4.598290e+03 3.299220e+03 3.475420e+03 | ||||||
|  | 1332496830358333 2.645020e+05 2.230850e+05 5.066380e+03 3.270560e+03 7.933170e+03 4.173710e+03 1.908910e+03 3.867460e+03 | ||||||
|  | 1332496830366667 2.578890e+05 2.236560e+05 4.201660e+03 4.473640e+03 7.688340e+03 4.161580e+03 6.875790e+02 3.653690e+03 | ||||||
|  | 1332496830375000 2.542700e+05 2.231510e+05 5.715140e+03 2.752140e+03 9.273320e+03 3.772950e+03 8.964040e+02 3.256060e+03 | ||||||
|  | 1332496830383333 2.582570e+05 2.242170e+05 6.114310e+03 1.856860e+03 9.604320e+03 4.200490e+03 1.764380e+03 2.939220e+03 | ||||||
|  | 1332496830391667 2.600200e+05 2.268680e+05 4.237530e+03 3.605880e+03 8.066220e+03 5.430250e+03 2.138580e+03 2.696710e+03 | ||||||
|  | 1332496830400000 2.550830e+05 2.259240e+05 3.350310e+03 4.853070e+03 7.045820e+03 5.925200e+03 1.893610e+03 2.897340e+03 | ||||||
|  | 1332496830408333 2.544530e+05 2.221270e+05 5.271330e+03 2.491500e+03 8.436680e+03 5.032080e+03 2.436050e+03 3.724590e+03 | ||||||
|  | 1332496830416667 2.625880e+05 2.199500e+05 5.994620e+03 7.892740e+02 9.029650e+03 3.515740e+03 1.953570e+03 4.014520e+03 | ||||||
|  | 1332496830425000 2.656100e+05 2.233330e+05 4.391410e+03 2.400960e+03 8.146460e+03 3.536960e+03 5.302320e+02 3.133920e+03 | ||||||
|  | 1332496830433333 2.574700e+05 2.269770e+05 2.975320e+03 4.633530e+03 7.278560e+03 4.640100e+03 -5.015020e+01 2.024960e+03 | ||||||
|  | 1332496830441667 2.506870e+05 2.263310e+05 4.517860e+03 3.183800e+03 8.072600e+03 5.281660e+03 1.605140e+03 2.335140e+03 | ||||||
|  | 1332496830450000 2.555630e+05 2.244950e+05 5.551000e+03 1.101300e+03 8.461490e+03 4.725700e+03 2.726670e+03 3.480540e+03 | ||||||
|  | 1332496830458333 2.613350e+05 2.246450e+05 4.764680e+03 1.557020e+03 7.833350e+03 3.524810e+03 1.577410e+03 4.038620e+03 | ||||||
|  | 1332496830466667 2.602690e+05 2.240080e+05 3.558030e+03 2.987610e+03 7.362440e+03 3.279230e+03 5.624420e+02 3.786550e+03 | ||||||
|  | 1332496830475000 2.574350e+05 2.217770e+05 4.972600e+03 2.166880e+03 8.481440e+03 3.328720e+03 1.037130e+03 3.271370e+03 | ||||||
|  | 1332496830483333 2.610460e+05 2.215500e+05 5.816180e+03 5.902170e+02 9.120930e+03 3.895400e+03 2.382670e+03 2.824170e+03 | ||||||
|  | 1332496830491667 2.627660e+05 2.244730e+05 4.835050e+03 1.785770e+03 7.880760e+03 4.745620e+03 2.443660e+03 3.229550e+03 | ||||||
|  | 1332496830500000 2.565090e+05 2.264130e+05 3.758870e+03 3.461200e+03 6.743770e+03 4.928960e+03 1.536620e+03 3.546690e+03 | ||||||
|  | 1332496830508333 2.507930e+05 2.243720e+05 5.218490e+03 2.865260e+03 7.803960e+03 4.351090e+03 1.333820e+03 3.680490e+03 | ||||||
|  | 1332496830516667 2.563190e+05 2.220660e+05 6.403970e+03 7.323450e+02 9.627760e+03 3.089300e+03 1.516780e+03 3.653690e+03 | ||||||
|  | 1332496830525000 2.633430e+05 2.232350e+05 5.200430e+03 1.388580e+03 9.372850e+03 3.371230e+03 1.450390e+03 2.678910e+03 | ||||||
|  | 1332496830533333 2.609030e+05 2.251100e+05 3.722580e+03 3.246660e+03 7.876540e+03 4.716810e+03 1.498440e+03 2.116520e+03 | ||||||
|  | 1332496830541667 2.544160e+05 2.237690e+05 4.841650e+03 2.956400e+03 8.115920e+03 5.392360e+03 2.142810e+03 2.652320e+03 | ||||||
|  | 1332496830550000 2.566980e+05 2.221720e+05 6.471230e+03 9.703960e+02 8.834980e+03 4.816840e+03 2.376630e+03 3.605860e+03 | ||||||
|  | 1332496830558333 2.618410e+05 2.235370e+05 5.500740e+03 1.189660e+03 8.365730e+03 4.016470e+03 1.042270e+03 3.821200e+03 | ||||||
|  | 1332496830566667 2.595030e+05 2.258400e+05 3.827930e+03 3.088840e+03 7.676140e+03 3.978310e+03 -3.570070e+02 3.016420e+03 | ||||||
|  | 1332496830575000 2.534570e+05 2.246360e+05 4.914610e+03 3.097450e+03 8.224900e+03 4.321440e+03 1.713740e+02 2.412360e+03 | ||||||
|  | 1332496830583333 2.560290e+05 2.222210e+05 6.841800e+03 1.028500e+03 9.252300e+03 4.387570e+03 2.418140e+03 2.510100e+03 | ||||||
|  | 1332496830591667 2.628400e+05 2.225500e+05 6.210250e+03 1.410730e+03 8.538900e+03 4.152580e+03 3.009300e+03 3.219760e+03 | ||||||
|  | 1332496830600000 2.616330e+05 2.250650e+05 4.284530e+03 3.357210e+03 7.282170e+03 3.823590e+03 1.402840e+03 3.644670e+03 | ||||||
|  | 1332496830608333 2.545910e+05 2.251090e+05 4.693160e+03 3.647740e+03 7.745160e+03 3.686380e+03 4.901610e+02 3.448860e+03 | ||||||
|  | 1332496830616667 2.547800e+05 2.235990e+05 6.527380e+03 1.569870e+03 9.438430e+03 3.456580e+03 1.162520e+03 3.252010e+03 | ||||||
|  | 1332496830625000 2.606390e+05 2.241070e+05 6.531050e+03 1.633050e+03 9.283720e+03 4.174020e+03 2.089550e+03 2.775750e+03 | ||||||
|  | 1332496830633333 2.611080e+05 2.254720e+05 4.968260e+03 3.527850e+03 7.692870e+03 5.137100e+03 2.207390e+03 2.436660e+03 | ||||||
|  | 1332496830641667 2.557750e+05 2.237080e+05 4.963450e+03 4.017370e+03 7.701420e+03 5.269650e+03 2.284400e+03 2.842080e+03 | ||||||
|  | 1332496830650000 2.573980e+05 2.209470e+05 6.767500e+03 1.645710e+03 9.107070e+03 4.000180e+03 2.548860e+03 3.624770e+03 | ||||||
|  | 1332496830658333 2.649240e+05 2.215590e+05 6.471460e+03 1.110330e+03 9.459650e+03 3.108170e+03 1.696970e+03 3.893440e+03 | ||||||
|  | 1332496830666667 2.653390e+05 2.257330e+05 4.348800e+03 3.459510e+03 8.475300e+03 4.031240e+03 5.733470e+02 2.910270e+03 | ||||||
|  | 1332496830675000 2.568140e+05 2.269950e+05 3.479540e+03 4.949790e+03 7.499910e+03 5.624710e+03 7.516560e+02 2.347710e+03 | ||||||
|  | 1332496830683333 2.533160e+05 2.251610e+05 5.147060e+03 3.218430e+03 8.460160e+03 5.869300e+03 2.336320e+03 2.987960e+03 | ||||||
|  | 1332496830691667 2.593600e+05 2.231010e+05 5.549120e+03 1.869950e+03 8.740760e+03 4.668940e+03 2.457910e+03 3.758820e+03 | ||||||
|  | 1332496830700000 2.620120e+05 2.240160e+05 4.173610e+03 3.004130e+03 8.157040e+03 3.704730e+03 9.879640e+02 3.652750e+03 | ||||||
|  | 1332496830708333 2.571760e+05 2.244200e+05 3.517300e+03 4.118750e+03 7.822240e+03 3.718230e+03 3.726490e+01 2.953680e+03 | ||||||
|  | 1332496830716667 2.551460e+05 2.233220e+05 4.923980e+03 2.330680e+03 9.095910e+03 3.792400e+03 1.013070e+03 2.711240e+03 | ||||||
|  | 1332496830725000 2.605240e+05 2.236510e+05 5.413630e+03 1.146210e+03 8.817170e+03 4.419650e+03 2.446650e+03 2.832050e+03 | ||||||
|  | 1332496830733333 2.620980e+05 2.257520e+05 4.262980e+03 2.270970e+03 7.135480e+03 5.067120e+03 2.294680e+03 3.376620e+03 | ||||||
|  | 1332496830741667 2.568890e+05 2.253790e+05 3.606460e+03 3.568190e+03 6.552650e+03 4.970270e+03 1.516380e+03 3.662570e+03 | ||||||
|  | 1332496830750000 2.539480e+05 2.226310e+05 5.511700e+03 2.066300e+03 7.952660e+03 4.019910e+03 1.513140e+03 3.752630e+03 | ||||||
|  | 1332496830758333 2.597990e+05 2.220670e+05 5.873500e+03 6.085840e+02 9.253780e+03 2.870740e+03 1.348240e+03 3.344200e+03 | ||||||
|  | 1332496830766667 2.625470e+05 2.249010e+05 4.346080e+03 1.928100e+03 8.590970e+03 3.455460e+03 9.043910e+02 2.379270e+03 | ||||||
|  | 1332496830775000 2.561370e+05 2.267610e+05 3.423560e+03 3.379080e+03 7.471150e+03 4.894170e+03 1.153540e+03 2.031410e+03 | ||||||
|  | 1332496830783333 2.503260e+05 2.250130e+05 5.519980e+03 2.423970e+03 7.991760e+03 5.117950e+03 2.098790e+03 3.099240e+03 | ||||||
|  | 1332496830791667 2.554540e+05 2.229920e+05 6.547950e+03 4.964960e+02 8.751340e+03 3.900560e+03 2.132290e+03 4.076810e+03 | ||||||
|  | 1332496830800000 2.612860e+05 2.234890e+05 5.152850e+03 1.501510e+03 8.425610e+03 2.888030e+03 7.761140e+02 3.786360e+03 | ||||||
|  | 1332496830808333 2.589690e+05 2.240690e+05 3.832610e+03 3.001980e+03 7.979260e+03 3.182310e+03 5.271600e+01 2.874800e+03 | ||||||
|  | 1332496830816667 2.549460e+05 2.220350e+05 5.317880e+03 2.139800e+03 9.103140e+03 3.955610e+03 1.235170e+03 2.394150e+03 | ||||||
|  | 1332496830825000 2.586760e+05 2.212050e+05 6.594910e+03 5.053440e+02 9.423360e+03 4.562470e+03 2.913740e+03 2.892350e+03 | ||||||
|  | 1332496830833333 2.621250e+05 2.235660e+05 5.116750e+03 1.773600e+03 8.082200e+03 4.776370e+03 2.386390e+03 3.659730e+03 | ||||||
|  | 1332496830841667 2.578350e+05 2.259180e+05 3.714300e+03 3.477080e+03 7.205370e+03 4.554610e+03 7.115390e+02 3.878420e+03 | ||||||
|  | 1332496830850000 2.536600e+05 2.243710e+05 5.022450e+03 2.592430e+03 8.277200e+03 4.119370e+03 4.865080e+02 3.666740e+03 | ||||||
|  | 1332496830858333 2.595030e+05 2.220610e+05 6.589950e+03 6.599360e+02 9.596920e+03 3.598100e+03 1.702490e+03 3.036600e+03 | ||||||
|  | 1332496830866667 2.654950e+05 2.228430e+05 5.541850e+03 1.728430e+03 8.459960e+03 4.492000e+03 2.231970e+03 2.430620e+03 | ||||||
|  | 1332496830875000 2.609290e+05 2.249960e+05 4.000950e+03 3.745990e+03 6.983790e+03 5.430860e+03 1.855260e+03 2.533380e+03 | ||||||
|  | 1332496830883333 2.527160e+05 2.243350e+05 5.086560e+03 3.401150e+03 7.597970e+03 5.196120e+03 1.755720e+03 3.079760e+03 | ||||||
|  | 1332496830891667 2.541100e+05 2.231110e+05 6.822190e+03 1.229080e+03 9.164340e+03 3.761230e+03 1.679390e+03 3.584880e+03 | ||||||
|  | 1332496830900000 2.599690e+05 2.246930e+05 6.183950e+03 1.538500e+03 9.222080e+03 3.139170e+03 9.499020e+02 3.180800e+03 | ||||||
|  | 1332496830908333 2.590780e+05 2.269130e+05 4.388890e+03 3.694820e+03 8.195020e+03 3.933000e+03 4.260800e+02 2.388450e+03 | ||||||
|  | 1332496830916667 2.545630e+05 2.247600e+05 5.168440e+03 4.020940e+03 8.450270e+03 4.758910e+03 1.458900e+03 2.286430e+03 | ||||||
|  | 1332496830925000 2.580590e+05 2.212170e+05 6.883460e+03 1.649530e+03 9.232780e+03 4.457650e+03 3.057820e+03 3.031950e+03 | ||||||
|  | 1332496830933333 2.646670e+05 2.211770e+05 6.218510e+03 1.645730e+03 8.657180e+03 3.663500e+03 2.528280e+03 3.978340e+03 | ||||||
|  | 1332496830941667 2.629250e+05 2.243820e+05 4.627500e+03 3.635930e+03 7.892800e+03 3.431320e+03 6.045090e+02 3.901370e+03 | ||||||
|  | 1332496830950000 2.547080e+05 2.254480e+05 4.408250e+03 4.461040e+03 8.197170e+03 3.953750e+03 -4.453460e+01 3.154870e+03 | ||||||
|  | 1332496830958333 2.537020e+05 2.246350e+05 5.825770e+03 2.577050e+03 9.590050e+03 4.569250e+03 1.460270e+03 2.785170e+03 | ||||||
|  | 1332496830966667 2.602060e+05 2.241400e+05 5.387980e+03 1.951160e+03 8.789510e+03 5.131660e+03 2.706380e+03 2.972480e+03 | ||||||
|  | 1332496830975000 2.612400e+05 2.247370e+05 3.860810e+03 3.418310e+03 7.414530e+03 5.284520e+03 2.271380e+03 3.183150e+03 | ||||||
|  | 1332496830983333 2.561400e+05 2.232520e+05 3.850010e+03 3.957140e+03 7.262650e+03 4.964640e+03 1.499510e+03 3.453130e+03 | ||||||
|  | 1332496830991667 2.561160e+05 2.213490e+05 5.594480e+03 2.054400e+03 8.835130e+03 3.662010e+03 1.485510e+03 3.613010e+03 | ||||||
							
								
								
									
										119
									
								
								tests/data/extract-2
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										119
									
								
								tests/data/extract-2
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,119 @@ | |||||||
|  | 1332496830008333 2.595670e+05 2.226980e+05 6.207600e+03 6.786720e+02 9.380230e+03 4.575580e+03 2.830610e+03 2.688630e+03 | ||||||
|  | 1332496830016667 2.630730e+05 2.233040e+05 4.961640e+03 2.197120e+03 7.687310e+03 4.861860e+03 2.732780e+03 3.008540e+03 | ||||||
|  | 1332496830025000 2.576140e+05 2.233230e+05 5.003660e+03 3.525140e+03 7.165310e+03 4.685620e+03 1.715380e+03 3.440480e+03 | ||||||
|  | 1332496830033333 2.557800e+05 2.219150e+05 6.357310e+03 2.145290e+03 8.426970e+03 3.775350e+03 1.475390e+03 3.797240e+03 | ||||||
|  | 1332496830041667 2.601660e+05 2.230080e+05 6.702590e+03 1.484960e+03 9.288100e+03 3.330830e+03 1.228500e+03 3.214320e+03 | ||||||
|  | 1332496830050000 2.612310e+05 2.264260e+05 4.980060e+03 2.982380e+03 8.499630e+03 4.267670e+03 9.940890e+02 2.292890e+03 | ||||||
|  | 1332496830058333 2.551170e+05 2.266420e+05 4.584410e+03 4.656440e+03 7.860150e+03 5.317310e+03 1.473600e+03 2.111690e+03 | ||||||
|  | 1332496830066667 2.533000e+05 2.235540e+05 6.455090e+03 3.036650e+03 8.869750e+03 4.986310e+03 2.607360e+03 2.839590e+03 | ||||||
|  | 1332496830075000 2.610610e+05 2.212630e+05 6.951980e+03 1.500240e+03 9.386100e+03 3.791680e+03 2.677010e+03 3.980630e+03 | ||||||
|  | 1332496830083333 2.665030e+05 2.231980e+05 5.189610e+03 2.594560e+03 8.571530e+03 3.175000e+03 9.198400e+02 3.792010e+03 | ||||||
|  | 1332496830091667 2.606920e+05 2.251840e+05 3.782480e+03 4.642880e+03 7.662960e+03 3.917790e+03 -2.510970e+02 2.907060e+03 | ||||||
|  | 1332496830100000 2.539630e+05 2.250810e+05 5.123530e+03 3.839550e+03 8.669030e+03 4.877820e+03 9.437240e+02 2.527450e+03 | ||||||
|  | 1332496830108333 2.565550e+05 2.241690e+05 5.930600e+03 2.298540e+03 8.906710e+03 5.331680e+03 2.549910e+03 3.053560e+03 | ||||||
|  | 1332496830116667 2.608890e+05 2.250100e+05 4.681130e+03 2.971870e+03 7.900040e+03 4.874080e+03 2.322430e+03 3.649120e+03 | ||||||
|  | 1332496830125000 2.579440e+05 2.249230e+05 3.291140e+03 4.357090e+03 7.131590e+03 4.385560e+03 1.077050e+03 3.664040e+03 | ||||||
|  | 1332496830133333 2.550090e+05 2.230180e+05 4.584820e+03 2.864000e+03 8.469490e+03 3.625580e+03 9.855570e+02 3.504230e+03 | ||||||
|  | 1332496830141667 2.601140e+05 2.219470e+05 5.676190e+03 1.210340e+03 9.393780e+03 3.390240e+03 1.654020e+03 3.018700e+03 | ||||||
|  | 1332496830150000 2.642770e+05 2.244380e+05 4.446620e+03 2.176720e+03 8.142090e+03 4.584880e+03 2.327830e+03 2.615800e+03 | ||||||
|  | 1332496830158333 2.592210e+05 2.264710e+05 2.734440e+03 4.182760e+03 6.389550e+03 5.540520e+03 1.958880e+03 2.720120e+03 | ||||||
|  | 1332496830166667 2.526500e+05 2.248310e+05 4.163640e+03 2.989990e+03 7.179200e+03 5.213060e+03 1.929550e+03 3.457660e+03 | ||||||
|  | 1332496830175000 2.570830e+05 2.220480e+05 5.759040e+03 7.024410e+02 8.566550e+03 3.552020e+03 1.832940e+03 3.956190e+03 | ||||||
|  | 1332496830183333 2.631300e+05 2.229670e+05 5.141140e+03 1.166120e+03 8.666960e+03 2.720370e+03 9.713740e+02 3.479730e+03 | ||||||
|  | 1332496830191667 2.602360e+05 2.252650e+05 3.425140e+03 3.339080e+03 7.853610e+03 3.674950e+03 5.259080e+02 2.443310e+03 | ||||||
|  | 1332496830200000 2.535030e+05 2.245270e+05 4.398130e+03 2.927430e+03 8.110280e+03 4.842470e+03 1.513870e+03 2.467100e+03 | ||||||
|  | 1332496830208333 2.561260e+05 2.226930e+05 6.043530e+03 6.562240e+02 8.797560e+03 4.832410e+03 2.832370e+03 3.426140e+03 | ||||||
|  | 1332496830216667 2.616770e+05 2.236080e+05 5.830460e+03 1.033910e+03 8.123940e+03 3.980690e+03 1.927960e+03 4.092720e+03 | ||||||
|  | 1332496830225000 2.594570e+05 2.255360e+05 4.015570e+03 2.995990e+03 7.135440e+03 3.713550e+03 3.072200e+02 3.849430e+03 | ||||||
|  | 1332496830233333 2.533520e+05 2.242160e+05 4.650560e+03 3.196620e+03 8.131280e+03 3.586160e+03 7.083230e+01 3.074180e+03 | ||||||
|  | 1332496830241667 2.561240e+05 2.215130e+05 6.100480e+03 8.219800e+02 9.757540e+03 3.474510e+03 1.647520e+03 2.559860e+03 | ||||||
|  | 1332496830250000 2.630240e+05 2.215590e+05 5.789960e+03 6.994170e+02 9.129740e+03 4.153080e+03 2.829250e+03 2.677270e+03 | ||||||
|  | 1332496830258333 2.617200e+05 2.240150e+05 4.358500e+03 2.645360e+03 7.414110e+03 4.810670e+03 2.225990e+03 3.185990e+03 | ||||||
|  | 1332496830266667 2.547560e+05 2.242400e+05 4.857380e+03 3.229680e+03 7.539310e+03 4.769140e+03 1.507130e+03 3.668260e+03 | ||||||
|  | 1332496830275000 2.568890e+05 2.226580e+05 6.473420e+03 1.214110e+03 9.010760e+03 3.848730e+03 1.303840e+03 3.778500e+03 | ||||||
|  | 1332496830283333 2.642080e+05 2.233160e+05 5.700450e+03 1.116560e+03 9.087610e+03 3.846680e+03 1.293590e+03 2.891560e+03 | ||||||
|  | 1332496830291667 2.633100e+05 2.257190e+05 3.936120e+03 3.252360e+03 7.552850e+03 4.897860e+03 1.156630e+03 2.037160e+03 | ||||||
|  | 1332496830300000 2.550790e+05 2.250860e+05 4.536450e+03 3.960110e+03 7.454590e+03 5.479070e+03 1.596360e+03 2.190800e+03 | ||||||
|  | 1332496830308333 2.544870e+05 2.225080e+05 6.635860e+03 1.758850e+03 8.732970e+03 4.466970e+03 2.650360e+03 3.139310e+03 | ||||||
|  | 1332496830316667 2.612410e+05 2.224320e+05 6.702270e+03 1.085130e+03 8.989230e+03 3.112990e+03 1.933560e+03 3.828410e+03 | ||||||
|  | 1332496830325000 2.621190e+05 2.255870e+05 4.714950e+03 2.892360e+03 8.107820e+03 2.961310e+03 2.399780e+02 3.273720e+03 | ||||||
|  | 1332496830333333 2.549990e+05 2.265140e+05 4.532090e+03 4.126900e+03 8.200130e+03 3.872590e+03 5.608900e+01 2.370580e+03 | ||||||
|  | 1332496830341667 2.542890e+05 2.240330e+05 6.538810e+03 2.251440e+03 9.419430e+03 4.564450e+03 2.077810e+03 2.508170e+03 | ||||||
|  | 1332496830350000 2.618900e+05 2.219600e+05 6.846090e+03 1.475270e+03 9.125590e+03 4.598290e+03 3.299220e+03 3.475420e+03 | ||||||
|  | 1332496830358333 2.645020e+05 2.230850e+05 5.066380e+03 3.270560e+03 7.933170e+03 4.173710e+03 1.908910e+03 3.867460e+03 | ||||||
|  | 1332496830366667 2.578890e+05 2.236560e+05 4.201660e+03 4.473640e+03 7.688340e+03 4.161580e+03 6.875790e+02 3.653690e+03 | ||||||
|  | 1332496830375000 2.542700e+05 2.231510e+05 5.715140e+03 2.752140e+03 9.273320e+03 3.772950e+03 8.964040e+02 3.256060e+03 | ||||||
|  | 1332496830383333 2.582570e+05 2.242170e+05 6.114310e+03 1.856860e+03 9.604320e+03 4.200490e+03 1.764380e+03 2.939220e+03 | ||||||
|  | 1332496830391667 2.600200e+05 2.268680e+05 4.237530e+03 3.605880e+03 8.066220e+03 5.430250e+03 2.138580e+03 2.696710e+03 | ||||||
|  | 1332496830400000 2.550830e+05 2.259240e+05 3.350310e+03 4.853070e+03 7.045820e+03 5.925200e+03 1.893610e+03 2.897340e+03 | ||||||
|  | 1332496830408333 2.544530e+05 2.221270e+05 5.271330e+03 2.491500e+03 8.436680e+03 5.032080e+03 2.436050e+03 3.724590e+03 | ||||||
|  | 1332496830416667 2.625880e+05 2.199500e+05 5.994620e+03 7.892740e+02 9.029650e+03 3.515740e+03 1.953570e+03 4.014520e+03 | ||||||
|  | 1332496830425000 2.656100e+05 2.233330e+05 4.391410e+03 2.400960e+03 8.146460e+03 3.536960e+03 5.302320e+02 3.133920e+03 | ||||||
|  | 1332496830433333 2.574700e+05 2.269770e+05 2.975320e+03 4.633530e+03 7.278560e+03 4.640100e+03 -5.015020e+01 2.024960e+03 | ||||||
|  | 1332496830441667 2.506870e+05 2.263310e+05 4.517860e+03 3.183800e+03 8.072600e+03 5.281660e+03 1.605140e+03 2.335140e+03 | ||||||
|  | 1332496830450000 2.555630e+05 2.244950e+05 5.551000e+03 1.101300e+03 8.461490e+03 4.725700e+03 2.726670e+03 3.480540e+03 | ||||||
|  | 1332496830458333 2.613350e+05 2.246450e+05 4.764680e+03 1.557020e+03 7.833350e+03 3.524810e+03 1.577410e+03 4.038620e+03 | ||||||
|  | 1332496830466667 2.602690e+05 2.240080e+05 3.558030e+03 2.987610e+03 7.362440e+03 3.279230e+03 5.624420e+02 3.786550e+03 | ||||||
|  | 1332496830475000 2.574350e+05 2.217770e+05 4.972600e+03 2.166880e+03 8.481440e+03 3.328720e+03 1.037130e+03 3.271370e+03 | ||||||
|  | 1332496830483333 2.610460e+05 2.215500e+05 5.816180e+03 5.902170e+02 9.120930e+03 3.895400e+03 2.382670e+03 2.824170e+03 | ||||||
|  | 1332496830491667 2.627660e+05 2.244730e+05 4.835050e+03 1.785770e+03 7.880760e+03 4.745620e+03 2.443660e+03 3.229550e+03 | ||||||
|  | 1332496830500000 2.565090e+05 2.264130e+05 3.758870e+03 3.461200e+03 6.743770e+03 4.928960e+03 1.536620e+03 3.546690e+03 | ||||||
|  | 1332496830508333 2.507930e+05 2.243720e+05 5.218490e+03 2.865260e+03 7.803960e+03 4.351090e+03 1.333820e+03 3.680490e+03 | ||||||
|  | 1332496830516667 2.563190e+05 2.220660e+05 6.403970e+03 7.323450e+02 9.627760e+03 3.089300e+03 1.516780e+03 3.653690e+03 | ||||||
|  | 1332496830525000 2.633430e+05 2.232350e+05 5.200430e+03 1.388580e+03 9.372850e+03 3.371230e+03 1.450390e+03 2.678910e+03 | ||||||
|  | 1332496830533333 2.609030e+05 2.251100e+05 3.722580e+03 3.246660e+03 7.876540e+03 4.716810e+03 1.498440e+03 2.116520e+03 | ||||||
|  | 1332496830541667 2.544160e+05 2.237690e+05 4.841650e+03 2.956400e+03 8.115920e+03 5.392360e+03 2.142810e+03 2.652320e+03 | ||||||
|  | 1332496830550000 2.566980e+05 2.221720e+05 6.471230e+03 9.703960e+02 8.834980e+03 4.816840e+03 2.376630e+03 3.605860e+03 | ||||||
|  | 1332496830558333 2.618410e+05 2.235370e+05 5.500740e+03 1.189660e+03 8.365730e+03 4.016470e+03 1.042270e+03 3.821200e+03 | ||||||
|  | 1332496830566667 2.595030e+05 2.258400e+05 3.827930e+03 3.088840e+03 7.676140e+03 3.978310e+03 -3.570070e+02 3.016420e+03 | ||||||
|  | 1332496830575000 2.534570e+05 2.246360e+05 4.914610e+03 3.097450e+03 8.224900e+03 4.321440e+03 1.713740e+02 2.412360e+03 | ||||||
|  | 1332496830583333 2.560290e+05 2.222210e+05 6.841800e+03 1.028500e+03 9.252300e+03 4.387570e+03 2.418140e+03 2.510100e+03 | ||||||
|  | 1332496830591667 2.628400e+05 2.225500e+05 6.210250e+03 1.410730e+03 8.538900e+03 4.152580e+03 3.009300e+03 3.219760e+03 | ||||||
|  | 1332496830600000 2.616330e+05 2.250650e+05 4.284530e+03 3.357210e+03 7.282170e+03 3.823590e+03 1.402840e+03 3.644670e+03 | ||||||
|  | 1332496830608333 2.545910e+05 2.251090e+05 4.693160e+03 3.647740e+03 7.745160e+03 3.686380e+03 4.901610e+02 3.448860e+03 | ||||||
|  | 1332496830616667 2.547800e+05 2.235990e+05 6.527380e+03 1.569870e+03 9.438430e+03 3.456580e+03 1.162520e+03 3.252010e+03 | ||||||
|  | 1332496830625000 2.606390e+05 2.241070e+05 6.531050e+03 1.633050e+03 9.283720e+03 4.174020e+03 2.089550e+03 2.775750e+03 | ||||||
|  | 1332496830633333 2.611080e+05 2.254720e+05 4.968260e+03 3.527850e+03 7.692870e+03 5.137100e+03 2.207390e+03 2.436660e+03 | ||||||
|  | 1332496830641667 2.557750e+05 2.237080e+05 4.963450e+03 4.017370e+03 7.701420e+03 5.269650e+03 2.284400e+03 2.842080e+03 | ||||||
|  | 1332496830650000 2.573980e+05 2.209470e+05 6.767500e+03 1.645710e+03 9.107070e+03 4.000180e+03 2.548860e+03 3.624770e+03 | ||||||
|  | 1332496830658333 2.649240e+05 2.215590e+05 6.471460e+03 1.110330e+03 9.459650e+03 3.108170e+03 1.696970e+03 3.893440e+03 | ||||||
|  | 1332496830666667 2.653390e+05 2.257330e+05 4.348800e+03 3.459510e+03 8.475300e+03 4.031240e+03 5.733470e+02 2.910270e+03 | ||||||
|  | 1332496830675000 2.568140e+05 2.269950e+05 3.479540e+03 4.949790e+03 7.499910e+03 5.624710e+03 7.516560e+02 2.347710e+03 | ||||||
|  | 1332496830683333 2.533160e+05 2.251610e+05 5.147060e+03 3.218430e+03 8.460160e+03 5.869300e+03 2.336320e+03 2.987960e+03 | ||||||
|  | 1332496830691667 2.593600e+05 2.231010e+05 5.549120e+03 1.869950e+03 8.740760e+03 4.668940e+03 2.457910e+03 3.758820e+03 | ||||||
|  | 1332496830700000 2.620120e+05 2.240160e+05 4.173610e+03 3.004130e+03 8.157040e+03 3.704730e+03 9.879640e+02 3.652750e+03 | ||||||
|  | 1332496830708333 2.571760e+05 2.244200e+05 3.517300e+03 4.118750e+03 7.822240e+03 3.718230e+03 3.726490e+01 2.953680e+03 | ||||||
|  | 1332496830716667 2.551460e+05 2.233220e+05 4.923980e+03 2.330680e+03 9.095910e+03 3.792400e+03 1.013070e+03 2.711240e+03 | ||||||
|  | 1332496830725000 2.605240e+05 2.236510e+05 5.413630e+03 1.146210e+03 8.817170e+03 4.419650e+03 2.446650e+03 2.832050e+03 | ||||||
|  | 1332496830733333 2.620980e+05 2.257520e+05 4.262980e+03 2.270970e+03 7.135480e+03 5.067120e+03 2.294680e+03 3.376620e+03 | ||||||
|  | 1332496830741667 2.568890e+05 2.253790e+05 3.606460e+03 3.568190e+03 6.552650e+03 4.970270e+03 1.516380e+03 3.662570e+03 | ||||||
|  | 1332496830750000 2.539480e+05 2.226310e+05 5.511700e+03 2.066300e+03 7.952660e+03 4.019910e+03 1.513140e+03 3.752630e+03 | ||||||
|  | 1332496830758333 2.597990e+05 2.220670e+05 5.873500e+03 6.085840e+02 9.253780e+03 2.870740e+03 1.348240e+03 3.344200e+03 | ||||||
|  | 1332496830766667 2.625470e+05 2.249010e+05 4.346080e+03 1.928100e+03 8.590970e+03 3.455460e+03 9.043910e+02 2.379270e+03 | ||||||
|  | 1332496830775000 2.561370e+05 2.267610e+05 3.423560e+03 3.379080e+03 7.471150e+03 4.894170e+03 1.153540e+03 2.031410e+03 | ||||||
|  | 1332496830783333 2.503260e+05 2.250130e+05 5.519980e+03 2.423970e+03 7.991760e+03 5.117950e+03 2.098790e+03 3.099240e+03 | ||||||
|  | 1332496830791667 2.554540e+05 2.229920e+05 6.547950e+03 4.964960e+02 8.751340e+03 3.900560e+03 2.132290e+03 4.076810e+03 | ||||||
|  | 1332496830800000 2.612860e+05 2.234890e+05 5.152850e+03 1.501510e+03 8.425610e+03 2.888030e+03 7.761140e+02 3.786360e+03 | ||||||
|  | 1332496830808333 2.589690e+05 2.240690e+05 3.832610e+03 3.001980e+03 7.979260e+03 3.182310e+03 5.271600e+01 2.874800e+03 | ||||||
|  | 1332496830816667 2.549460e+05 2.220350e+05 5.317880e+03 2.139800e+03 9.103140e+03 3.955610e+03 1.235170e+03 2.394150e+03 | ||||||
|  | 1332496830825000 2.586760e+05 2.212050e+05 6.594910e+03 5.053440e+02 9.423360e+03 4.562470e+03 2.913740e+03 2.892350e+03 | ||||||
|  | 1332496830833333 2.621250e+05 2.235660e+05 5.116750e+03 1.773600e+03 8.082200e+03 4.776370e+03 2.386390e+03 3.659730e+03 | ||||||
|  | 1332496830841667 2.578350e+05 2.259180e+05 3.714300e+03 3.477080e+03 7.205370e+03 4.554610e+03 7.115390e+02 3.878420e+03 | ||||||
|  | 1332496830850000 2.536600e+05 2.243710e+05 5.022450e+03 2.592430e+03 8.277200e+03 4.119370e+03 4.865080e+02 3.666740e+03 | ||||||
|  | 1332496830858333 2.595030e+05 2.220610e+05 6.589950e+03 6.599360e+02 9.596920e+03 3.598100e+03 1.702490e+03 3.036600e+03 | ||||||
|  | 1332496830866667 2.654950e+05 2.228430e+05 5.541850e+03 1.728430e+03 8.459960e+03 4.492000e+03 2.231970e+03 2.430620e+03 | ||||||
|  | 1332496830875000 2.609290e+05 2.249960e+05 4.000950e+03 3.745990e+03 6.983790e+03 5.430860e+03 1.855260e+03 2.533380e+03 | ||||||
|  | 1332496830883333 2.527160e+05 2.243350e+05 5.086560e+03 3.401150e+03 7.597970e+03 5.196120e+03 1.755720e+03 3.079760e+03 | ||||||
|  | 1332496830891667 2.541100e+05 2.231110e+05 6.822190e+03 1.229080e+03 9.164340e+03 3.761230e+03 1.679390e+03 3.584880e+03 | ||||||
|  | 1332496830900000 2.599690e+05 2.246930e+05 6.183950e+03 1.538500e+03 9.222080e+03 3.139170e+03 9.499020e+02 3.180800e+03 | ||||||
|  | 1332496830908333 2.590780e+05 2.269130e+05 4.388890e+03 3.694820e+03 8.195020e+03 3.933000e+03 4.260800e+02 2.388450e+03 | ||||||
|  | 1332496830916667 2.545630e+05 2.247600e+05 5.168440e+03 4.020940e+03 8.450270e+03 4.758910e+03 1.458900e+03 2.286430e+03 | ||||||
|  | 1332496830925000 2.580590e+05 2.212170e+05 6.883460e+03 1.649530e+03 9.232780e+03 4.457650e+03 3.057820e+03 3.031950e+03 | ||||||
|  | 1332496830933333 2.646670e+05 2.211770e+05 6.218510e+03 1.645730e+03 8.657180e+03 3.663500e+03 2.528280e+03 3.978340e+03 | ||||||
|  | 1332496830941667 2.629250e+05 2.243820e+05 4.627500e+03 3.635930e+03 7.892800e+03 3.431320e+03 6.045090e+02 3.901370e+03 | ||||||
|  | 1332496830950000 2.547080e+05 2.254480e+05 4.408250e+03 4.461040e+03 8.197170e+03 3.953750e+03 -4.453460e+01 3.154870e+03 | ||||||
|  | 1332496830958333 2.537020e+05 2.246350e+05 5.825770e+03 2.577050e+03 9.590050e+03 4.569250e+03 1.460270e+03 2.785170e+03 | ||||||
|  | 1332496830966667 2.602060e+05 2.241400e+05 5.387980e+03 1.951160e+03 8.789510e+03 5.131660e+03 2.706380e+03 2.972480e+03 | ||||||
|  | 1332496830975000 2.612400e+05 2.247370e+05 3.860810e+03 3.418310e+03 7.414530e+03 5.284520e+03 2.271380e+03 3.183150e+03 | ||||||
|  | 1332496830983333 2.561400e+05 2.232520e+05 3.850010e+03 3.957140e+03 7.262650e+03 4.964640e+03 1.499510e+03 3.453130e+03 | ||||||
|  | 1332496830991667 2.561160e+05 2.213490e+05 5.594480e+03 2.054400e+03 8.835130e+03 3.662010e+03 1.485510e+03 3.613010e+03 | ||||||
							
								
								
									
										1
									
								
								tests/data/extract-3
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1
									
								
								tests/data/extract-3
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1 @@ | |||||||
|  | 1332496830008333 2.595670e+05 2.226980e+05 6.207600e+03 6.786720e+02 9.380230e+03 4.575580e+03 2.830610e+03 2.688630e+03 | ||||||
							
								
								
									
										2
									
								
								tests/data/extract-4
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										2
									
								
								tests/data/extract-4
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,2 @@ | |||||||
|  | 1332496830008333 2.595670e+05 2.226980e+05 6.207600e+03 6.786720e+02 9.380230e+03 4.575580e+03 2.830610e+03 2.688630e+03 | ||||||
|  | 1332496830016667 2.630730e+05 2.233040e+05 4.961640e+03 2.197120e+03 7.687310e+03 4.861860e+03 2.732780e+03 3.008540e+03 | ||||||
							
								
								
									
										124
									
								
								tests/data/extract-5
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										124
									
								
								tests/data/extract-5
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,124 @@ | |||||||
|  | # path: /newton/prep | ||||||
|  | # layout: float32_8 | ||||||
|  | # start: Fri, 23 Mar 2012 10:00:30.000000 +0000 | ||||||
|  | # end: Fri, 23 Mar 2012 10:00:31.000000 +0000 | ||||||
|  | 2.517740e+05 2.242410e+05 5.688100e+03 1.915530e+03 9.329220e+03 4.183710e+03 1.212350e+03 2.641790e+03 | ||||||
|  | 2.595670e+05 2.226980e+05 6.207600e+03 6.786720e+02 9.380230e+03 4.575580e+03 2.830610e+03 2.688630e+03 | ||||||
|  | 2.630730e+05 2.233040e+05 4.961640e+03 2.197120e+03 7.687310e+03 4.861860e+03 2.732780e+03 3.008540e+03 | ||||||
|  | 2.576140e+05 2.233230e+05 5.003660e+03 3.525140e+03 7.165310e+03 4.685620e+03 1.715380e+03 3.440480e+03 | ||||||
|  | 2.557800e+05 2.219150e+05 6.357310e+03 2.145290e+03 8.426970e+03 3.775350e+03 1.475390e+03 3.797240e+03 | ||||||
|  | 2.601660e+05 2.230080e+05 6.702590e+03 1.484960e+03 9.288100e+03 3.330830e+03 1.228500e+03 3.214320e+03 | ||||||
|  | 2.612310e+05 2.264260e+05 4.980060e+03 2.982380e+03 8.499630e+03 4.267670e+03 9.940890e+02 2.292890e+03 | ||||||
|  | 2.551170e+05 2.266420e+05 4.584410e+03 4.656440e+03 7.860150e+03 5.317310e+03 1.473600e+03 2.111690e+03 | ||||||
|  | 2.533000e+05 2.235540e+05 6.455090e+03 3.036650e+03 8.869750e+03 4.986310e+03 2.607360e+03 2.839590e+03 | ||||||
|  | 2.610610e+05 2.212630e+05 6.951980e+03 1.500240e+03 9.386100e+03 3.791680e+03 2.677010e+03 3.980630e+03 | ||||||
|  | 2.665030e+05 2.231980e+05 5.189610e+03 2.594560e+03 8.571530e+03 3.175000e+03 9.198400e+02 3.792010e+03 | ||||||
|  | 2.606920e+05 2.251840e+05 3.782480e+03 4.642880e+03 7.662960e+03 3.917790e+03 -2.510970e+02 2.907060e+03 | ||||||
|  | 2.539630e+05 2.250810e+05 5.123530e+03 3.839550e+03 8.669030e+03 4.877820e+03 9.437240e+02 2.527450e+03 | ||||||
|  | 2.565550e+05 2.241690e+05 5.930600e+03 2.298540e+03 8.906710e+03 5.331680e+03 2.549910e+03 3.053560e+03 | ||||||
|  | 2.608890e+05 2.250100e+05 4.681130e+03 2.971870e+03 7.900040e+03 4.874080e+03 2.322430e+03 3.649120e+03 | ||||||
|  | 2.579440e+05 2.249230e+05 3.291140e+03 4.357090e+03 7.131590e+03 4.385560e+03 1.077050e+03 3.664040e+03 | ||||||
|  | 2.550090e+05 2.230180e+05 4.584820e+03 2.864000e+03 8.469490e+03 3.625580e+03 9.855570e+02 3.504230e+03 | ||||||
|  | 2.601140e+05 2.219470e+05 5.676190e+03 1.210340e+03 9.393780e+03 3.390240e+03 1.654020e+03 3.018700e+03 | ||||||
|  | 2.642770e+05 2.244380e+05 4.446620e+03 2.176720e+03 8.142090e+03 4.584880e+03 2.327830e+03 2.615800e+03 | ||||||
|  | 2.592210e+05 2.264710e+05 2.734440e+03 4.182760e+03 6.389550e+03 5.540520e+03 1.958880e+03 2.720120e+03 | ||||||
|  | 2.526500e+05 2.248310e+05 4.163640e+03 2.989990e+03 7.179200e+03 5.213060e+03 1.929550e+03 3.457660e+03 | ||||||
|  | 2.570830e+05 2.220480e+05 5.759040e+03 7.024410e+02 8.566550e+03 3.552020e+03 1.832940e+03 3.956190e+03 | ||||||
|  | 2.631300e+05 2.229670e+05 5.141140e+03 1.166120e+03 8.666960e+03 2.720370e+03 9.713740e+02 3.479730e+03 | ||||||
|  | 2.602360e+05 2.252650e+05 3.425140e+03 3.339080e+03 7.853610e+03 3.674950e+03 5.259080e+02 2.443310e+03 | ||||||
|  | 2.535030e+05 2.245270e+05 4.398130e+03 2.927430e+03 8.110280e+03 4.842470e+03 1.513870e+03 2.467100e+03 | ||||||
|  | 2.561260e+05 2.226930e+05 6.043530e+03 6.562240e+02 8.797560e+03 4.832410e+03 2.832370e+03 3.426140e+03 | ||||||
|  | 2.616770e+05 2.236080e+05 5.830460e+03 1.033910e+03 8.123940e+03 3.980690e+03 1.927960e+03 4.092720e+03 | ||||||
|  | 2.594570e+05 2.255360e+05 4.015570e+03 2.995990e+03 7.135440e+03 3.713550e+03 3.072200e+02 3.849430e+03 | ||||||
|  | 2.533520e+05 2.242160e+05 4.650560e+03 3.196620e+03 8.131280e+03 3.586160e+03 7.083230e+01 3.074180e+03 | ||||||
|  | 2.561240e+05 2.215130e+05 6.100480e+03 8.219800e+02 9.757540e+03 3.474510e+03 1.647520e+03 2.559860e+03 | ||||||
|  | 2.630240e+05 2.215590e+05 5.789960e+03 6.994170e+02 9.129740e+03 4.153080e+03 2.829250e+03 2.677270e+03 | ||||||
|  | 2.617200e+05 2.240150e+05 4.358500e+03 2.645360e+03 7.414110e+03 4.810670e+03 2.225990e+03 3.185990e+03 | ||||||
|  | 2.547560e+05 2.242400e+05 4.857380e+03 3.229680e+03 7.539310e+03 4.769140e+03 1.507130e+03 3.668260e+03 | ||||||
|  | 2.568890e+05 2.226580e+05 6.473420e+03 1.214110e+03 9.010760e+03 3.848730e+03 1.303840e+03 3.778500e+03 | ||||||
|  | 2.642080e+05 2.233160e+05 5.700450e+03 1.116560e+03 9.087610e+03 3.846680e+03 1.293590e+03 2.891560e+03 | ||||||
|  | 2.633100e+05 2.257190e+05 3.936120e+03 3.252360e+03 7.552850e+03 4.897860e+03 1.156630e+03 2.037160e+03 | ||||||
|  | 2.550790e+05 2.250860e+05 4.536450e+03 3.960110e+03 7.454590e+03 5.479070e+03 1.596360e+03 2.190800e+03 | ||||||
|  | 2.544870e+05 2.225080e+05 6.635860e+03 1.758850e+03 8.732970e+03 4.466970e+03 2.650360e+03 3.139310e+03 | ||||||
|  | 2.612410e+05 2.224320e+05 6.702270e+03 1.085130e+03 8.989230e+03 3.112990e+03 1.933560e+03 3.828410e+03 | ||||||
|  | 2.621190e+05 2.255870e+05 4.714950e+03 2.892360e+03 8.107820e+03 2.961310e+03 2.399780e+02 3.273720e+03 | ||||||
|  | 2.549990e+05 2.265140e+05 4.532090e+03 4.126900e+03 8.200130e+03 3.872590e+03 5.608900e+01 2.370580e+03 | ||||||
|  | 2.542890e+05 2.240330e+05 6.538810e+03 2.251440e+03 9.419430e+03 4.564450e+03 2.077810e+03 2.508170e+03 | ||||||
|  | 2.618900e+05 2.219600e+05 6.846090e+03 1.475270e+03 9.125590e+03 4.598290e+03 3.299220e+03 3.475420e+03 | ||||||
|  | 2.645020e+05 2.230850e+05 5.066380e+03 3.270560e+03 7.933170e+03 4.173710e+03 1.908910e+03 3.867460e+03 | ||||||
|  | 2.578890e+05 2.236560e+05 4.201660e+03 4.473640e+03 7.688340e+03 4.161580e+03 6.875790e+02 3.653690e+03 | ||||||
|  | 2.542700e+05 2.231510e+05 5.715140e+03 2.752140e+03 9.273320e+03 3.772950e+03 8.964040e+02 3.256060e+03 | ||||||
|  | 2.582570e+05 2.242170e+05 6.114310e+03 1.856860e+03 9.604320e+03 4.200490e+03 1.764380e+03 2.939220e+03 | ||||||
|  | 2.600200e+05 2.268680e+05 4.237530e+03 3.605880e+03 8.066220e+03 5.430250e+03 2.138580e+03 2.696710e+03 | ||||||
|  | 2.550830e+05 2.259240e+05 3.350310e+03 4.853070e+03 7.045820e+03 5.925200e+03 1.893610e+03 2.897340e+03 | ||||||
|  | 2.544530e+05 2.221270e+05 5.271330e+03 2.491500e+03 8.436680e+03 5.032080e+03 2.436050e+03 3.724590e+03 | ||||||
|  | 2.625880e+05 2.199500e+05 5.994620e+03 7.892740e+02 9.029650e+03 3.515740e+03 1.953570e+03 4.014520e+03 | ||||||
|  | 2.656100e+05 2.233330e+05 4.391410e+03 2.400960e+03 8.146460e+03 3.536960e+03 5.302320e+02 3.133920e+03 | ||||||
|  | 2.574700e+05 2.269770e+05 2.975320e+03 4.633530e+03 7.278560e+03 4.640100e+03 -5.015020e+01 2.024960e+03 | ||||||
|  | 2.506870e+05 2.263310e+05 4.517860e+03 3.183800e+03 8.072600e+03 5.281660e+03 1.605140e+03 2.335140e+03 | ||||||
|  | 2.555630e+05 2.244950e+05 5.551000e+03 1.101300e+03 8.461490e+03 4.725700e+03 2.726670e+03 3.480540e+03 | ||||||
|  | 2.613350e+05 2.246450e+05 4.764680e+03 1.557020e+03 7.833350e+03 3.524810e+03 1.577410e+03 4.038620e+03 | ||||||
|  | 2.602690e+05 2.240080e+05 3.558030e+03 2.987610e+03 7.362440e+03 3.279230e+03 5.624420e+02 3.786550e+03 | ||||||
|  | 2.574350e+05 2.217770e+05 4.972600e+03 2.166880e+03 8.481440e+03 3.328720e+03 1.037130e+03 3.271370e+03 | ||||||
|  | 2.610460e+05 2.215500e+05 5.816180e+03 5.902170e+02 9.120930e+03 3.895400e+03 2.382670e+03 2.824170e+03 | ||||||
|  | 2.627660e+05 2.244730e+05 4.835050e+03 1.785770e+03 7.880760e+03 4.745620e+03 2.443660e+03 3.229550e+03 | ||||||
|  | 2.565090e+05 2.264130e+05 3.758870e+03 3.461200e+03 6.743770e+03 4.928960e+03 1.536620e+03 3.546690e+03 | ||||||
|  | 2.507930e+05 2.243720e+05 5.218490e+03 2.865260e+03 7.803960e+03 4.351090e+03 1.333820e+03 3.680490e+03 | ||||||
|  | 2.563190e+05 2.220660e+05 6.403970e+03 7.323450e+02 9.627760e+03 3.089300e+03 1.516780e+03 3.653690e+03 | ||||||
|  | 2.633430e+05 2.232350e+05 5.200430e+03 1.388580e+03 9.372850e+03 3.371230e+03 1.450390e+03 2.678910e+03 | ||||||
|  | 2.609030e+05 2.251100e+05 3.722580e+03 3.246660e+03 7.876540e+03 4.716810e+03 1.498440e+03 2.116520e+03 | ||||||
|  | 2.544160e+05 2.237690e+05 4.841650e+03 2.956400e+03 8.115920e+03 5.392360e+03 2.142810e+03 2.652320e+03 | ||||||
|  | 2.566980e+05 2.221720e+05 6.471230e+03 9.703960e+02 8.834980e+03 4.816840e+03 2.376630e+03 3.605860e+03 | ||||||
|  | 2.618410e+05 2.235370e+05 5.500740e+03 1.189660e+03 8.365730e+03 4.016470e+03 1.042270e+03 3.821200e+03 | ||||||
|  | 2.595030e+05 2.258400e+05 3.827930e+03 3.088840e+03 7.676140e+03 3.978310e+03 -3.570070e+02 3.016420e+03 | ||||||
|  | 2.534570e+05 2.246360e+05 4.914610e+03 3.097450e+03 8.224900e+03 4.321440e+03 1.713740e+02 2.412360e+03 | ||||||
|  | 2.560290e+05 2.222210e+05 6.841800e+03 1.028500e+03 9.252300e+03 4.387570e+03 2.418140e+03 2.510100e+03 | ||||||
|  | 2.628400e+05 2.225500e+05 6.210250e+03 1.410730e+03 8.538900e+03 4.152580e+03 3.009300e+03 3.219760e+03 | ||||||
|  | 2.616330e+05 2.250650e+05 4.284530e+03 3.357210e+03 7.282170e+03 3.823590e+03 1.402840e+03 3.644670e+03 | ||||||
|  | 2.545910e+05 2.251090e+05 4.693160e+03 3.647740e+03 7.745160e+03 3.686380e+03 4.901610e+02 3.448860e+03 | ||||||
|  | 2.547800e+05 2.235990e+05 6.527380e+03 1.569870e+03 9.438430e+03 3.456580e+03 1.162520e+03 3.252010e+03 | ||||||
|  | 2.606390e+05 2.241070e+05 6.531050e+03 1.633050e+03 9.283720e+03 4.174020e+03 2.089550e+03 2.775750e+03 | ||||||
|  | 2.611080e+05 2.254720e+05 4.968260e+03 3.527850e+03 7.692870e+03 5.137100e+03 2.207390e+03 2.436660e+03 | ||||||
|  | 2.557750e+05 2.237080e+05 4.963450e+03 4.017370e+03 7.701420e+03 5.269650e+03 2.284400e+03 2.842080e+03 | ||||||
|  | 2.573980e+05 2.209470e+05 6.767500e+03 1.645710e+03 9.107070e+03 4.000180e+03 2.548860e+03 3.624770e+03 | ||||||
|  | 2.649240e+05 2.215590e+05 6.471460e+03 1.110330e+03 9.459650e+03 3.108170e+03 1.696970e+03 3.893440e+03 | ||||||
|  | 2.653390e+05 2.257330e+05 4.348800e+03 3.459510e+03 8.475300e+03 4.031240e+03 5.733470e+02 2.910270e+03 | ||||||
|  | 2.568140e+05 2.269950e+05 3.479540e+03 4.949790e+03 7.499910e+03 5.624710e+03 7.516560e+02 2.347710e+03 | ||||||
|  | 2.533160e+05 2.251610e+05 5.147060e+03 3.218430e+03 8.460160e+03 5.869300e+03 2.336320e+03 2.987960e+03 | ||||||
|  | 2.593600e+05 2.231010e+05 5.549120e+03 1.869950e+03 8.740760e+03 4.668940e+03 2.457910e+03 3.758820e+03 | ||||||
|  | 2.620120e+05 2.240160e+05 4.173610e+03 3.004130e+03 8.157040e+03 3.704730e+03 9.879640e+02 3.652750e+03 | ||||||
|  | 2.571760e+05 2.244200e+05 3.517300e+03 4.118750e+03 7.822240e+03 3.718230e+03 3.726490e+01 2.953680e+03 | ||||||
|  | 2.551460e+05 2.233220e+05 4.923980e+03 2.330680e+03 9.095910e+03 3.792400e+03 1.013070e+03 2.711240e+03 | ||||||
|  | 2.605240e+05 2.236510e+05 5.413630e+03 1.146210e+03 8.817170e+03 4.419650e+03 2.446650e+03 2.832050e+03 | ||||||
|  | 2.620980e+05 2.257520e+05 4.262980e+03 2.270970e+03 7.135480e+03 5.067120e+03 2.294680e+03 3.376620e+03 | ||||||
|  | 2.568890e+05 2.253790e+05 3.606460e+03 3.568190e+03 6.552650e+03 4.970270e+03 1.516380e+03 3.662570e+03 | ||||||
|  | 2.539480e+05 2.226310e+05 5.511700e+03 2.066300e+03 7.952660e+03 4.019910e+03 1.513140e+03 3.752630e+03 | ||||||
|  | 2.597990e+05 2.220670e+05 5.873500e+03 6.085840e+02 9.253780e+03 2.870740e+03 1.348240e+03 3.344200e+03 | ||||||
|  | 2.625470e+05 2.249010e+05 4.346080e+03 1.928100e+03 8.590970e+03 3.455460e+03 9.043910e+02 2.379270e+03 | ||||||
|  | 2.561370e+05 2.267610e+05 3.423560e+03 3.379080e+03 7.471150e+03 4.894170e+03 1.153540e+03 2.031410e+03 | ||||||
|  | 2.503260e+05 2.250130e+05 5.519980e+03 2.423970e+03 7.991760e+03 5.117950e+03 2.098790e+03 3.099240e+03 | ||||||
|  | 2.554540e+05 2.229920e+05 6.547950e+03 4.964960e+02 8.751340e+03 3.900560e+03 2.132290e+03 4.076810e+03 | ||||||
|  | 2.612860e+05 2.234890e+05 5.152850e+03 1.501510e+03 8.425610e+03 2.888030e+03 7.761140e+02 3.786360e+03 | ||||||
|  | 2.589690e+05 2.240690e+05 3.832610e+03 3.001980e+03 7.979260e+03 3.182310e+03 5.271600e+01 2.874800e+03 | ||||||
|  | 2.549460e+05 2.220350e+05 5.317880e+03 2.139800e+03 9.103140e+03 3.955610e+03 1.235170e+03 2.394150e+03 | ||||||
|  | 2.586760e+05 2.212050e+05 6.594910e+03 5.053440e+02 9.423360e+03 4.562470e+03 2.913740e+03 2.892350e+03 | ||||||
|  | 2.621250e+05 2.235660e+05 5.116750e+03 1.773600e+03 8.082200e+03 4.776370e+03 2.386390e+03 3.659730e+03 | ||||||
|  | 2.578350e+05 2.259180e+05 3.714300e+03 3.477080e+03 7.205370e+03 4.554610e+03 7.115390e+02 3.878420e+03 | ||||||
|  | 2.536600e+05 2.243710e+05 5.022450e+03 2.592430e+03 8.277200e+03 4.119370e+03 4.865080e+02 3.666740e+03 | ||||||
|  | 2.595030e+05 2.220610e+05 6.589950e+03 6.599360e+02 9.596920e+03 3.598100e+03 1.702490e+03 3.036600e+03 | ||||||
|  | 2.654950e+05 2.228430e+05 5.541850e+03 1.728430e+03 8.459960e+03 4.492000e+03 2.231970e+03 2.430620e+03 | ||||||
|  | 2.609290e+05 2.249960e+05 4.000950e+03 3.745990e+03 6.983790e+03 5.430860e+03 1.855260e+03 2.533380e+03 | ||||||
|  | 2.527160e+05 2.243350e+05 5.086560e+03 3.401150e+03 7.597970e+03 5.196120e+03 1.755720e+03 3.079760e+03 | ||||||
|  | 2.541100e+05 2.231110e+05 6.822190e+03 1.229080e+03 9.164340e+03 3.761230e+03 1.679390e+03 3.584880e+03 | ||||||
|  | 2.599690e+05 2.246930e+05 6.183950e+03 1.538500e+03 9.222080e+03 3.139170e+03 9.499020e+02 3.180800e+03 | ||||||
|  | 2.590780e+05 2.269130e+05 4.388890e+03 3.694820e+03 8.195020e+03 3.933000e+03 4.260800e+02 2.388450e+03 | ||||||
|  | 2.545630e+05 2.247600e+05 5.168440e+03 4.020940e+03 8.450270e+03 4.758910e+03 1.458900e+03 2.286430e+03 | ||||||
|  | 2.580590e+05 2.212170e+05 6.883460e+03 1.649530e+03 9.232780e+03 4.457650e+03 3.057820e+03 3.031950e+03 | ||||||
|  | 2.646670e+05 2.211770e+05 6.218510e+03 1.645730e+03 8.657180e+03 3.663500e+03 2.528280e+03 3.978340e+03 | ||||||
|  | 2.629250e+05 2.243820e+05 4.627500e+03 3.635930e+03 7.892800e+03 3.431320e+03 6.045090e+02 3.901370e+03 | ||||||
|  | 2.547080e+05 2.254480e+05 4.408250e+03 4.461040e+03 8.197170e+03 3.953750e+03 -4.453460e+01 3.154870e+03 | ||||||
|  | 2.537020e+05 2.246350e+05 5.825770e+03 2.577050e+03 9.590050e+03 4.569250e+03 1.460270e+03 2.785170e+03 | ||||||
|  | 2.602060e+05 2.241400e+05 5.387980e+03 1.951160e+03 8.789510e+03 5.131660e+03 2.706380e+03 2.972480e+03 | ||||||
|  | 2.612400e+05 2.247370e+05 3.860810e+03 3.418310e+03 7.414530e+03 5.284520e+03 2.271380e+03 3.183150e+03 | ||||||
|  | 2.561400e+05 2.232520e+05 3.850010e+03 3.957140e+03 7.262650e+03 4.964640e+03 1.499510e+03 3.453130e+03 | ||||||
|  | 2.561160e+05 2.213490e+05 5.594480e+03 2.054400e+03 8.835130e+03 3.662010e+03 1.485510e+03 3.613010e+03 | ||||||
							
								
								
									
										120
									
								
								tests/data/extract-6
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										120
									
								
								tests/data/extract-6
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,120 @@ | |||||||
|  | 2.517740e+05 2.242410e+05 5.688100e+03 1.915530e+03 9.329220e+03 4.183710e+03 1.212350e+03 2.641790e+03 | ||||||
|  | 2.595670e+05 2.226980e+05 6.207600e+03 6.786720e+02 9.380230e+03 4.575580e+03 2.830610e+03 2.688630e+03 | ||||||
|  | 2.630730e+05 2.233040e+05 4.961640e+03 2.197120e+03 7.687310e+03 4.861860e+03 2.732780e+03 3.008540e+03 | ||||||
|  | 2.576140e+05 2.233230e+05 5.003660e+03 3.525140e+03 7.165310e+03 4.685620e+03 1.715380e+03 3.440480e+03 | ||||||
|  | 2.557800e+05 2.219150e+05 6.357310e+03 2.145290e+03 8.426970e+03 3.775350e+03 1.475390e+03 3.797240e+03 | ||||||
|  | 2.601660e+05 2.230080e+05 6.702590e+03 1.484960e+03 9.288100e+03 3.330830e+03 1.228500e+03 3.214320e+03 | ||||||
|  | 2.612310e+05 2.264260e+05 4.980060e+03 2.982380e+03 8.499630e+03 4.267670e+03 9.940890e+02 2.292890e+03 | ||||||
|  | 2.551170e+05 2.266420e+05 4.584410e+03 4.656440e+03 7.860150e+03 5.317310e+03 1.473600e+03 2.111690e+03 | ||||||
|  | 2.533000e+05 2.235540e+05 6.455090e+03 3.036650e+03 8.869750e+03 4.986310e+03 2.607360e+03 2.839590e+03 | ||||||
|  | 2.610610e+05 2.212630e+05 6.951980e+03 1.500240e+03 9.386100e+03 3.791680e+03 2.677010e+03 3.980630e+03 | ||||||
|  | 2.665030e+05 2.231980e+05 5.189610e+03 2.594560e+03 8.571530e+03 3.175000e+03 9.198400e+02 3.792010e+03 | ||||||
|  | 2.606920e+05 2.251840e+05 3.782480e+03 4.642880e+03 7.662960e+03 3.917790e+03 -2.510970e+02 2.907060e+03 | ||||||
|  | 2.539630e+05 2.250810e+05 5.123530e+03 3.839550e+03 8.669030e+03 4.877820e+03 9.437240e+02 2.527450e+03 | ||||||
|  | 2.565550e+05 2.241690e+05 5.930600e+03 2.298540e+03 8.906710e+03 5.331680e+03 2.549910e+03 3.053560e+03 | ||||||
|  | 2.608890e+05 2.250100e+05 4.681130e+03 2.971870e+03 7.900040e+03 4.874080e+03 2.322430e+03 3.649120e+03 | ||||||
|  | 2.579440e+05 2.249230e+05 3.291140e+03 4.357090e+03 7.131590e+03 4.385560e+03 1.077050e+03 3.664040e+03 | ||||||
|  | 2.550090e+05 2.230180e+05 4.584820e+03 2.864000e+03 8.469490e+03 3.625580e+03 9.855570e+02 3.504230e+03 | ||||||
|  | 2.601140e+05 2.219470e+05 5.676190e+03 1.210340e+03 9.393780e+03 3.390240e+03 1.654020e+03 3.018700e+03 | ||||||
|  | 2.642770e+05 2.244380e+05 4.446620e+03 2.176720e+03 8.142090e+03 4.584880e+03 2.327830e+03 2.615800e+03 | ||||||
|  | 2.592210e+05 2.264710e+05 2.734440e+03 4.182760e+03 6.389550e+03 5.540520e+03 1.958880e+03 2.720120e+03 | ||||||
|  | 2.526500e+05 2.248310e+05 4.163640e+03 2.989990e+03 7.179200e+03 5.213060e+03 1.929550e+03 3.457660e+03 | ||||||
|  | 2.570830e+05 2.220480e+05 5.759040e+03 7.024410e+02 8.566550e+03 3.552020e+03 1.832940e+03 3.956190e+03 | ||||||
|  | 2.631300e+05 2.229670e+05 5.141140e+03 1.166120e+03 8.666960e+03 2.720370e+03 9.713740e+02 3.479730e+03 | ||||||
|  | 2.602360e+05 2.252650e+05 3.425140e+03 3.339080e+03 7.853610e+03 3.674950e+03 5.259080e+02 2.443310e+03 | ||||||
|  | 2.535030e+05 2.245270e+05 4.398130e+03 2.927430e+03 8.110280e+03 4.842470e+03 1.513870e+03 2.467100e+03 | ||||||
|  | 2.561260e+05 2.226930e+05 6.043530e+03 6.562240e+02 8.797560e+03 4.832410e+03 2.832370e+03 3.426140e+03 | ||||||
|  | 2.616770e+05 2.236080e+05 5.830460e+03 1.033910e+03 8.123940e+03 3.980690e+03 1.927960e+03 4.092720e+03 | ||||||
|  | 2.594570e+05 2.255360e+05 4.015570e+03 2.995990e+03 7.135440e+03 3.713550e+03 3.072200e+02 3.849430e+03 | ||||||
|  | 2.533520e+05 2.242160e+05 4.650560e+03 3.196620e+03 8.131280e+03 3.586160e+03 7.083230e+01 3.074180e+03 | ||||||
|  | 2.561240e+05 2.215130e+05 6.100480e+03 8.219800e+02 9.757540e+03 3.474510e+03 1.647520e+03 2.559860e+03 | ||||||
|  | 2.630240e+05 2.215590e+05 5.789960e+03 6.994170e+02 9.129740e+03 4.153080e+03 2.829250e+03 2.677270e+03 | ||||||
|  | 2.617200e+05 2.240150e+05 4.358500e+03 2.645360e+03 7.414110e+03 4.810670e+03 2.225990e+03 3.185990e+03 | ||||||
|  | 2.547560e+05 2.242400e+05 4.857380e+03 3.229680e+03 7.539310e+03 4.769140e+03 1.507130e+03 3.668260e+03 | ||||||
|  | 2.568890e+05 2.226580e+05 6.473420e+03 1.214110e+03 9.010760e+03 3.848730e+03 1.303840e+03 3.778500e+03 | ||||||
|  | 2.642080e+05 2.233160e+05 5.700450e+03 1.116560e+03 9.087610e+03 3.846680e+03 1.293590e+03 2.891560e+03 | ||||||
|  | 2.633100e+05 2.257190e+05 3.936120e+03 3.252360e+03 7.552850e+03 4.897860e+03 1.156630e+03 2.037160e+03 | ||||||
|  | 2.550790e+05 2.250860e+05 4.536450e+03 3.960110e+03 7.454590e+03 5.479070e+03 1.596360e+03 2.190800e+03 | ||||||
|  | 2.544870e+05 2.225080e+05 6.635860e+03 1.758850e+03 8.732970e+03 4.466970e+03 2.650360e+03 3.139310e+03 | ||||||
|  | 2.612410e+05 2.224320e+05 6.702270e+03 1.085130e+03 8.989230e+03 3.112990e+03 1.933560e+03 3.828410e+03 | ||||||
|  | 2.621190e+05 2.255870e+05 4.714950e+03 2.892360e+03 8.107820e+03 2.961310e+03 2.399780e+02 3.273720e+03 | ||||||
|  | 2.549990e+05 2.265140e+05 4.532090e+03 4.126900e+03 8.200130e+03 3.872590e+03 5.608900e+01 2.370580e+03 | ||||||
|  | 2.542890e+05 2.240330e+05 6.538810e+03 2.251440e+03 9.419430e+03 4.564450e+03 2.077810e+03 2.508170e+03 | ||||||
|  | 2.618900e+05 2.219600e+05 6.846090e+03 1.475270e+03 9.125590e+03 4.598290e+03 3.299220e+03 3.475420e+03 | ||||||
|  | 2.645020e+05 2.230850e+05 5.066380e+03 3.270560e+03 7.933170e+03 4.173710e+03 1.908910e+03 3.867460e+03 | ||||||
|  | 2.578890e+05 2.236560e+05 4.201660e+03 4.473640e+03 7.688340e+03 4.161580e+03 6.875790e+02 3.653690e+03 | ||||||
|  | 2.542700e+05 2.231510e+05 5.715140e+03 2.752140e+03 9.273320e+03 3.772950e+03 8.964040e+02 3.256060e+03 | ||||||
|  | 2.582570e+05 2.242170e+05 6.114310e+03 1.856860e+03 9.604320e+03 4.200490e+03 1.764380e+03 2.939220e+03 | ||||||
|  | 2.600200e+05 2.268680e+05 4.237530e+03 3.605880e+03 8.066220e+03 5.430250e+03 2.138580e+03 2.696710e+03 | ||||||
|  | 2.550830e+05 2.259240e+05 3.350310e+03 4.853070e+03 7.045820e+03 5.925200e+03 1.893610e+03 2.897340e+03 | ||||||
|  | 2.544530e+05 2.221270e+05 5.271330e+03 2.491500e+03 8.436680e+03 5.032080e+03 2.436050e+03 3.724590e+03 | ||||||
|  | 2.625880e+05 2.199500e+05 5.994620e+03 7.892740e+02 9.029650e+03 3.515740e+03 1.953570e+03 4.014520e+03 | ||||||
|  | 2.656100e+05 2.233330e+05 4.391410e+03 2.400960e+03 8.146460e+03 3.536960e+03 5.302320e+02 3.133920e+03 | ||||||
|  | 2.574700e+05 2.269770e+05 2.975320e+03 4.633530e+03 7.278560e+03 4.640100e+03 -5.015020e+01 2.024960e+03 | ||||||
|  | 2.506870e+05 2.263310e+05 4.517860e+03 3.183800e+03 8.072600e+03 5.281660e+03 1.605140e+03 2.335140e+03 | ||||||
|  | 2.555630e+05 2.244950e+05 5.551000e+03 1.101300e+03 8.461490e+03 4.725700e+03 2.726670e+03 3.480540e+03 | ||||||
|  | 2.613350e+05 2.246450e+05 4.764680e+03 1.557020e+03 7.833350e+03 3.524810e+03 1.577410e+03 4.038620e+03 | ||||||
|  | 2.602690e+05 2.240080e+05 3.558030e+03 2.987610e+03 7.362440e+03 3.279230e+03 5.624420e+02 3.786550e+03 | ||||||
|  | 2.574350e+05 2.217770e+05 4.972600e+03 2.166880e+03 8.481440e+03 3.328720e+03 1.037130e+03 3.271370e+03 | ||||||
|  | 2.610460e+05 2.215500e+05 5.816180e+03 5.902170e+02 9.120930e+03 3.895400e+03 2.382670e+03 2.824170e+03 | ||||||
|  | 2.627660e+05 2.244730e+05 4.835050e+03 1.785770e+03 7.880760e+03 4.745620e+03 2.443660e+03 3.229550e+03 | ||||||
|  | 2.565090e+05 2.264130e+05 3.758870e+03 3.461200e+03 6.743770e+03 4.928960e+03 1.536620e+03 3.546690e+03 | ||||||
|  | 2.507930e+05 2.243720e+05 5.218490e+03 2.865260e+03 7.803960e+03 4.351090e+03 1.333820e+03 3.680490e+03 | ||||||
|  | 2.563190e+05 2.220660e+05 6.403970e+03 7.323450e+02 9.627760e+03 3.089300e+03 1.516780e+03 3.653690e+03 | ||||||
|  | 2.633430e+05 2.232350e+05 5.200430e+03 1.388580e+03 9.372850e+03 3.371230e+03 1.450390e+03 2.678910e+03 | ||||||
|  | 2.609030e+05 2.251100e+05 3.722580e+03 3.246660e+03 7.876540e+03 4.716810e+03 1.498440e+03 2.116520e+03 | ||||||
|  | 2.544160e+05 2.237690e+05 4.841650e+03 2.956400e+03 8.115920e+03 5.392360e+03 2.142810e+03 2.652320e+03 | ||||||
|  | 2.566980e+05 2.221720e+05 6.471230e+03 9.703960e+02 8.834980e+03 4.816840e+03 2.376630e+03 3.605860e+03 | ||||||
|  | 2.618410e+05 2.235370e+05 5.500740e+03 1.189660e+03 8.365730e+03 4.016470e+03 1.042270e+03 3.821200e+03 | ||||||
|  | 2.595030e+05 2.258400e+05 3.827930e+03 3.088840e+03 7.676140e+03 3.978310e+03 -3.570070e+02 3.016420e+03 | ||||||
|  | 2.534570e+05 2.246360e+05 4.914610e+03 3.097450e+03 8.224900e+03 4.321440e+03 1.713740e+02 2.412360e+03 | ||||||
|  | 2.560290e+05 2.222210e+05 6.841800e+03 1.028500e+03 9.252300e+03 4.387570e+03 2.418140e+03 2.510100e+03 | ||||||
|  | 2.628400e+05 2.225500e+05 6.210250e+03 1.410730e+03 8.538900e+03 4.152580e+03 3.009300e+03 3.219760e+03 | ||||||
|  | 2.616330e+05 2.250650e+05 4.284530e+03 3.357210e+03 7.282170e+03 3.823590e+03 1.402840e+03 3.644670e+03 | ||||||
|  | 2.545910e+05 2.251090e+05 4.693160e+03 3.647740e+03 7.745160e+03 3.686380e+03 4.901610e+02 3.448860e+03 | ||||||
|  | 2.547800e+05 2.235990e+05 6.527380e+03 1.569870e+03 9.438430e+03 3.456580e+03 1.162520e+03 3.252010e+03 | ||||||
|  | 2.606390e+05 2.241070e+05 6.531050e+03 1.633050e+03 9.283720e+03 4.174020e+03 2.089550e+03 2.775750e+03 | ||||||
|  | 2.611080e+05 2.254720e+05 4.968260e+03 3.527850e+03 7.692870e+03 5.137100e+03 2.207390e+03 2.436660e+03 | ||||||
|  | 2.557750e+05 2.237080e+05 4.963450e+03 4.017370e+03 7.701420e+03 5.269650e+03 2.284400e+03 2.842080e+03 | ||||||
|  | 2.573980e+05 2.209470e+05 6.767500e+03 1.645710e+03 9.107070e+03 4.000180e+03 2.548860e+03 3.624770e+03 | ||||||
|  | 2.649240e+05 2.215590e+05 6.471460e+03 1.110330e+03 9.459650e+03 3.108170e+03 1.696970e+03 3.893440e+03 | ||||||
|  | 2.653390e+05 2.257330e+05 4.348800e+03 3.459510e+03 8.475300e+03 4.031240e+03 5.733470e+02 2.910270e+03 | ||||||
|  | 2.568140e+05 2.269950e+05 3.479540e+03 4.949790e+03 7.499910e+03 5.624710e+03 7.516560e+02 2.347710e+03 | ||||||
|  | 2.533160e+05 2.251610e+05 5.147060e+03 3.218430e+03 8.460160e+03 5.869300e+03 2.336320e+03 2.987960e+03 | ||||||
|  | 2.593600e+05 2.231010e+05 5.549120e+03 1.869950e+03 8.740760e+03 4.668940e+03 2.457910e+03 3.758820e+03 | ||||||
|  | 2.620120e+05 2.240160e+05 4.173610e+03 3.004130e+03 8.157040e+03 3.704730e+03 9.879640e+02 3.652750e+03 | ||||||
|  | 2.571760e+05 2.244200e+05 3.517300e+03 4.118750e+03 7.822240e+03 3.718230e+03 3.726490e+01 2.953680e+03 | ||||||
|  | 2.551460e+05 2.233220e+05 4.923980e+03 2.330680e+03 9.095910e+03 3.792400e+03 1.013070e+03 2.711240e+03 | ||||||
|  | 2.605240e+05 2.236510e+05 5.413630e+03 1.146210e+03 8.817170e+03 4.419650e+03 2.446650e+03 2.832050e+03 | ||||||
|  | 2.620980e+05 2.257520e+05 4.262980e+03 2.270970e+03 7.135480e+03 5.067120e+03 2.294680e+03 3.376620e+03 | ||||||
|  | 2.568890e+05 2.253790e+05 3.606460e+03 3.568190e+03 6.552650e+03 4.970270e+03 1.516380e+03 3.662570e+03 | ||||||
|  | 2.539480e+05 2.226310e+05 5.511700e+03 2.066300e+03 7.952660e+03 4.019910e+03 1.513140e+03 3.752630e+03 | ||||||
|  | 2.597990e+05 2.220670e+05 5.873500e+03 6.085840e+02 9.253780e+03 2.870740e+03 1.348240e+03 3.344200e+03 | ||||||
|  | 2.625470e+05 2.249010e+05 4.346080e+03 1.928100e+03 8.590970e+03 3.455460e+03 9.043910e+02 2.379270e+03 | ||||||
|  | 2.561370e+05 2.267610e+05 3.423560e+03 3.379080e+03 7.471150e+03 4.894170e+03 1.153540e+03 2.031410e+03 | ||||||
|  | 2.503260e+05 2.250130e+05 5.519980e+03 2.423970e+03 7.991760e+03 5.117950e+03 2.098790e+03 3.099240e+03 | ||||||
|  | 2.554540e+05 2.229920e+05 6.547950e+03 4.964960e+02 8.751340e+03 3.900560e+03 2.132290e+03 4.076810e+03 | ||||||
|  | 2.612860e+05 2.234890e+05 5.152850e+03 1.501510e+03 8.425610e+03 2.888030e+03 7.761140e+02 3.786360e+03 | ||||||
|  | 2.589690e+05 2.240690e+05 3.832610e+03 3.001980e+03 7.979260e+03 3.182310e+03 5.271600e+01 2.874800e+03 | ||||||
|  | 2.549460e+05 2.220350e+05 5.317880e+03 2.139800e+03 9.103140e+03 3.955610e+03 1.235170e+03 2.394150e+03 | ||||||
|  | 2.586760e+05 2.212050e+05 6.594910e+03 5.053440e+02 9.423360e+03 4.562470e+03 2.913740e+03 2.892350e+03 | ||||||
|  | 2.621250e+05 2.235660e+05 5.116750e+03 1.773600e+03 8.082200e+03 4.776370e+03 2.386390e+03 3.659730e+03 | ||||||
|  | 2.578350e+05 2.259180e+05 3.714300e+03 3.477080e+03 7.205370e+03 4.554610e+03 7.115390e+02 3.878420e+03 | ||||||
|  | 2.536600e+05 2.243710e+05 5.022450e+03 2.592430e+03 8.277200e+03 4.119370e+03 4.865080e+02 3.666740e+03 | ||||||
|  | 2.595030e+05 2.220610e+05 6.589950e+03 6.599360e+02 9.596920e+03 3.598100e+03 1.702490e+03 3.036600e+03 | ||||||
|  | 2.654950e+05 2.228430e+05 5.541850e+03 1.728430e+03 8.459960e+03 4.492000e+03 2.231970e+03 2.430620e+03 | ||||||
|  | 2.609290e+05 2.249960e+05 4.000950e+03 3.745990e+03 6.983790e+03 5.430860e+03 1.855260e+03 2.533380e+03 | ||||||
|  | 2.527160e+05 2.243350e+05 5.086560e+03 3.401150e+03 7.597970e+03 5.196120e+03 1.755720e+03 3.079760e+03 | ||||||
|  | 2.541100e+05 2.231110e+05 6.822190e+03 1.229080e+03 9.164340e+03 3.761230e+03 1.679390e+03 3.584880e+03 | ||||||
|  | 2.599690e+05 2.246930e+05 6.183950e+03 1.538500e+03 9.222080e+03 3.139170e+03 9.499020e+02 3.180800e+03 | ||||||
|  | 2.590780e+05 2.269130e+05 4.388890e+03 3.694820e+03 8.195020e+03 3.933000e+03 4.260800e+02 2.388450e+03 | ||||||
|  | 2.545630e+05 2.247600e+05 5.168440e+03 4.020940e+03 8.450270e+03 4.758910e+03 1.458900e+03 2.286430e+03 | ||||||
|  | 2.580590e+05 2.212170e+05 6.883460e+03 1.649530e+03 9.232780e+03 4.457650e+03 3.057820e+03 3.031950e+03 | ||||||
|  | 2.646670e+05 2.211770e+05 6.218510e+03 1.645730e+03 8.657180e+03 3.663500e+03 2.528280e+03 3.978340e+03 | ||||||
|  | 2.629250e+05 2.243820e+05 4.627500e+03 3.635930e+03 7.892800e+03 3.431320e+03 6.045090e+02 3.901370e+03 | ||||||
|  | 2.547080e+05 2.254480e+05 4.408250e+03 4.461040e+03 8.197170e+03 3.953750e+03 -4.453460e+01 3.154870e+03 | ||||||
|  | 2.537020e+05 2.246350e+05 5.825770e+03 2.577050e+03 9.590050e+03 4.569250e+03 1.460270e+03 2.785170e+03 | ||||||
|  | 2.602060e+05 2.241400e+05 5.387980e+03 1.951160e+03 8.789510e+03 5.131660e+03 2.706380e+03 2.972480e+03 | ||||||
|  | 2.612400e+05 2.247370e+05 3.860810e+03 3.418310e+03 7.414530e+03 5.284520e+03 2.271380e+03 3.183150e+03 | ||||||
|  | 2.561400e+05 2.232520e+05 3.850010e+03 3.957140e+03 7.262650e+03 4.964640e+03 1.499510e+03 3.453130e+03 | ||||||
|  | 2.561160e+05 2.213490e+05 5.594480e+03 2.054400e+03 8.835130e+03 3.662010e+03 1.485510e+03 3.613010e+03 | ||||||
							
								
								
									
										124
									
								
								tests/data/extract-7
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										124
									
								
								tests/data/extract-7
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,124 @@ | |||||||
|  | # path: /newton/prep | ||||||
|  | # layout: float32_8 | ||||||
|  | # start: 1332496830000000 | ||||||
|  | # end: 1332496830999000 | ||||||
|  | 1332496830000000 2.517740e+05 2.242410e+05 5.688100e+03 1.915530e+03 9.329220e+03 4.183710e+03 1.212350e+03 2.641790e+03 | ||||||
|  | 1332496830008333 2.595670e+05 2.226980e+05 6.207600e+03 6.786720e+02 9.380230e+03 4.575580e+03 2.830610e+03 2.688630e+03 | ||||||
|  | 1332496830016667 2.630730e+05 2.233040e+05 4.961640e+03 2.197120e+03 7.687310e+03 4.861860e+03 2.732780e+03 3.008540e+03 | ||||||
|  | 1332496830025000 2.576140e+05 2.233230e+05 5.003660e+03 3.525140e+03 7.165310e+03 4.685620e+03 1.715380e+03 3.440480e+03 | ||||||
|  | 1332496830033333 2.557800e+05 2.219150e+05 6.357310e+03 2.145290e+03 8.426970e+03 3.775350e+03 1.475390e+03 3.797240e+03 | ||||||
|  | 1332496830041667 2.601660e+05 2.230080e+05 6.702590e+03 1.484960e+03 9.288100e+03 3.330830e+03 1.228500e+03 3.214320e+03 | ||||||
|  | 1332496830050000 2.612310e+05 2.264260e+05 4.980060e+03 2.982380e+03 8.499630e+03 4.267670e+03 9.940890e+02 2.292890e+03 | ||||||
|  | 1332496830058333 2.551170e+05 2.266420e+05 4.584410e+03 4.656440e+03 7.860150e+03 5.317310e+03 1.473600e+03 2.111690e+03 | ||||||
|  | 1332496830066667 2.533000e+05 2.235540e+05 6.455090e+03 3.036650e+03 8.869750e+03 4.986310e+03 2.607360e+03 2.839590e+03 | ||||||
|  | 1332496830075000 2.610610e+05 2.212630e+05 6.951980e+03 1.500240e+03 9.386100e+03 3.791680e+03 2.677010e+03 3.980630e+03 | ||||||
|  | 1332496830083333 2.665030e+05 2.231980e+05 5.189610e+03 2.594560e+03 8.571530e+03 3.175000e+03 9.198400e+02 3.792010e+03 | ||||||
|  | 1332496830091667 2.606920e+05 2.251840e+05 3.782480e+03 4.642880e+03 7.662960e+03 3.917790e+03 -2.510970e+02 2.907060e+03 | ||||||
|  | 1332496830100000 2.539630e+05 2.250810e+05 5.123530e+03 3.839550e+03 8.669030e+03 4.877820e+03 9.437240e+02 2.527450e+03 | ||||||
|  | 1332496830108333 2.565550e+05 2.241690e+05 5.930600e+03 2.298540e+03 8.906710e+03 5.331680e+03 2.549910e+03 3.053560e+03 | ||||||
|  | 1332496830116667 2.608890e+05 2.250100e+05 4.681130e+03 2.971870e+03 7.900040e+03 4.874080e+03 2.322430e+03 3.649120e+03 | ||||||
|  | 1332496830125000 2.579440e+05 2.249230e+05 3.291140e+03 4.357090e+03 7.131590e+03 4.385560e+03 1.077050e+03 3.664040e+03 | ||||||
|  | 1332496830133333 2.550090e+05 2.230180e+05 4.584820e+03 2.864000e+03 8.469490e+03 3.625580e+03 9.855570e+02 3.504230e+03 | ||||||
|  | 1332496830141667 2.601140e+05 2.219470e+05 5.676190e+03 1.210340e+03 9.393780e+03 3.390240e+03 1.654020e+03 3.018700e+03 | ||||||
|  | 1332496830150000 2.642770e+05 2.244380e+05 4.446620e+03 2.176720e+03 8.142090e+03 4.584880e+03 2.327830e+03 2.615800e+03 | ||||||
|  | 1332496830158333 2.592210e+05 2.264710e+05 2.734440e+03 4.182760e+03 6.389550e+03 5.540520e+03 1.958880e+03 2.720120e+03 | ||||||
|  | 1332496830166667 2.526500e+05 2.248310e+05 4.163640e+03 2.989990e+03 7.179200e+03 5.213060e+03 1.929550e+03 3.457660e+03 | ||||||
|  | 1332496830175000 2.570830e+05 2.220480e+05 5.759040e+03 7.024410e+02 8.566550e+03 3.552020e+03 1.832940e+03 3.956190e+03 | ||||||
|  | 1332496830183333 2.631300e+05 2.229670e+05 5.141140e+03 1.166120e+03 8.666960e+03 2.720370e+03 9.713740e+02 3.479730e+03 | ||||||
|  | 1332496830191667 2.602360e+05 2.252650e+05 3.425140e+03 3.339080e+03 7.853610e+03 3.674950e+03 5.259080e+02 2.443310e+03 | ||||||
|  | 1332496830200000 2.535030e+05 2.245270e+05 4.398130e+03 2.927430e+03 8.110280e+03 4.842470e+03 1.513870e+03 2.467100e+03 | ||||||
|  | 1332496830208333 2.561260e+05 2.226930e+05 6.043530e+03 6.562240e+02 8.797560e+03 4.832410e+03 2.832370e+03 3.426140e+03 | ||||||
|  | 1332496830216667 2.616770e+05 2.236080e+05 5.830460e+03 1.033910e+03 8.123940e+03 3.980690e+03 1.927960e+03 4.092720e+03 | ||||||
|  | 1332496830225000 2.594570e+05 2.255360e+05 4.015570e+03 2.995990e+03 7.135440e+03 3.713550e+03 3.072200e+02 3.849430e+03 | ||||||
|  | 1332496830233333 2.533520e+05 2.242160e+05 4.650560e+03 3.196620e+03 8.131280e+03 3.586160e+03 7.083230e+01 3.074180e+03 | ||||||
|  | 1332496830241667 2.561240e+05 2.215130e+05 6.100480e+03 8.219800e+02 9.757540e+03 3.474510e+03 1.647520e+03 2.559860e+03 | ||||||
|  | 1332496830250000 2.630240e+05 2.215590e+05 5.789960e+03 6.994170e+02 9.129740e+03 4.153080e+03 2.829250e+03 2.677270e+03 | ||||||
|  | 1332496830258333 2.617200e+05 2.240150e+05 4.358500e+03 2.645360e+03 7.414110e+03 4.810670e+03 2.225990e+03 3.185990e+03 | ||||||
|  | 1332496830266667 2.547560e+05 2.242400e+05 4.857380e+03 3.229680e+03 7.539310e+03 4.769140e+03 1.507130e+03 3.668260e+03 | ||||||
|  | 1332496830275000 2.568890e+05 2.226580e+05 6.473420e+03 1.214110e+03 9.010760e+03 3.848730e+03 1.303840e+03 3.778500e+03 | ||||||
|  | 1332496830283333 2.642080e+05 2.233160e+05 5.700450e+03 1.116560e+03 9.087610e+03 3.846680e+03 1.293590e+03 2.891560e+03 | ||||||
|  | 1332496830291667 2.633100e+05 2.257190e+05 3.936120e+03 3.252360e+03 7.552850e+03 4.897860e+03 1.156630e+03 2.037160e+03 | ||||||
|  | 1332496830300000 2.550790e+05 2.250860e+05 4.536450e+03 3.960110e+03 7.454590e+03 5.479070e+03 1.596360e+03 2.190800e+03 | ||||||
|  | 1332496830308333 2.544870e+05 2.225080e+05 6.635860e+03 1.758850e+03 8.732970e+03 4.466970e+03 2.650360e+03 3.139310e+03 | ||||||
|  | 1332496830316667 2.612410e+05 2.224320e+05 6.702270e+03 1.085130e+03 8.989230e+03 3.112990e+03 1.933560e+03 3.828410e+03 | ||||||
|  | 1332496830325000 2.621190e+05 2.255870e+05 4.714950e+03 2.892360e+03 8.107820e+03 2.961310e+03 2.399780e+02 3.273720e+03 | ||||||
|  | 1332496830333333 2.549990e+05 2.265140e+05 4.532090e+03 4.126900e+03 8.200130e+03 3.872590e+03 5.608900e+01 2.370580e+03 | ||||||
|  | 1332496830341667 2.542890e+05 2.240330e+05 6.538810e+03 2.251440e+03 9.419430e+03 4.564450e+03 2.077810e+03 2.508170e+03 | ||||||
|  | 1332496830350000 2.618900e+05 2.219600e+05 6.846090e+03 1.475270e+03 9.125590e+03 4.598290e+03 3.299220e+03 3.475420e+03 | ||||||
|  | 1332496830358333 2.645020e+05 2.230850e+05 5.066380e+03 3.270560e+03 7.933170e+03 4.173710e+03 1.908910e+03 3.867460e+03 | ||||||
|  | 1332496830366667 2.578890e+05 2.236560e+05 4.201660e+03 4.473640e+03 7.688340e+03 4.161580e+03 6.875790e+02 3.653690e+03 | ||||||
|  | 1332496830375000 2.542700e+05 2.231510e+05 5.715140e+03 2.752140e+03 9.273320e+03 3.772950e+03 8.964040e+02 3.256060e+03 | ||||||
|  | 1332496830383333 2.582570e+05 2.242170e+05 6.114310e+03 1.856860e+03 9.604320e+03 4.200490e+03 1.764380e+03 2.939220e+03 | ||||||
|  | 1332496830391667 2.600200e+05 2.268680e+05 4.237530e+03 3.605880e+03 8.066220e+03 5.430250e+03 2.138580e+03 2.696710e+03 | ||||||
|  | 1332496830400000 2.550830e+05 2.259240e+05 3.350310e+03 4.853070e+03 7.045820e+03 5.925200e+03 1.893610e+03 2.897340e+03 | ||||||
|  | 1332496830408333 2.544530e+05 2.221270e+05 5.271330e+03 2.491500e+03 8.436680e+03 5.032080e+03 2.436050e+03 3.724590e+03 | ||||||
|  | 1332496830416667 2.625880e+05 2.199500e+05 5.994620e+03 7.892740e+02 9.029650e+03 3.515740e+03 1.953570e+03 4.014520e+03 | ||||||
|  | 1332496830425000 2.656100e+05 2.233330e+05 4.391410e+03 2.400960e+03 8.146460e+03 3.536960e+03 5.302320e+02 3.133920e+03 | ||||||
|  | 1332496830433333 2.574700e+05 2.269770e+05 2.975320e+03 4.633530e+03 7.278560e+03 4.640100e+03 -5.015020e+01 2.024960e+03 | ||||||
|  | 1332496830441667 2.506870e+05 2.263310e+05 4.517860e+03 3.183800e+03 8.072600e+03 5.281660e+03 1.605140e+03 2.335140e+03 | ||||||
|  | 1332496830450000 2.555630e+05 2.244950e+05 5.551000e+03 1.101300e+03 8.461490e+03 4.725700e+03 2.726670e+03 3.480540e+03 | ||||||
|  | 1332496830458333 2.613350e+05 2.246450e+05 4.764680e+03 1.557020e+03 7.833350e+03 3.524810e+03 1.577410e+03 4.038620e+03 | ||||||
|  | 1332496830466667 2.602690e+05 2.240080e+05 3.558030e+03 2.987610e+03 7.362440e+03 3.279230e+03 5.624420e+02 3.786550e+03 | ||||||
|  | 1332496830475000 2.574350e+05 2.217770e+05 4.972600e+03 2.166880e+03 8.481440e+03 3.328720e+03 1.037130e+03 3.271370e+03 | ||||||
|  | 1332496830483333 2.610460e+05 2.215500e+05 5.816180e+03 5.902170e+02 9.120930e+03 3.895400e+03 2.382670e+03 2.824170e+03 | ||||||
|  | 1332496830491667 2.627660e+05 2.244730e+05 4.835050e+03 1.785770e+03 7.880760e+03 4.745620e+03 2.443660e+03 3.229550e+03 | ||||||
|  | 1332496830500000 2.565090e+05 2.264130e+05 3.758870e+03 3.461200e+03 6.743770e+03 4.928960e+03 1.536620e+03 3.546690e+03 | ||||||
|  | 1332496830508333 2.507930e+05 2.243720e+05 5.218490e+03 2.865260e+03 7.803960e+03 4.351090e+03 1.333820e+03 3.680490e+03 | ||||||
|  | 1332496830516667 2.563190e+05 2.220660e+05 6.403970e+03 7.323450e+02 9.627760e+03 3.089300e+03 1.516780e+03 3.653690e+03 | ||||||
|  | 1332496830525000 2.633430e+05 2.232350e+05 5.200430e+03 1.388580e+03 9.372850e+03 3.371230e+03 1.450390e+03 2.678910e+03 | ||||||
|  | 1332496830533333 2.609030e+05 2.251100e+05 3.722580e+03 3.246660e+03 7.876540e+03 4.716810e+03 1.498440e+03 2.116520e+03 | ||||||
|  | 1332496830541667 2.544160e+05 2.237690e+05 4.841650e+03 2.956400e+03 8.115920e+03 5.392360e+03 2.142810e+03 2.652320e+03 | ||||||
|  | 1332496830550000 2.566980e+05 2.221720e+05 6.471230e+03 9.703960e+02 8.834980e+03 4.816840e+03 2.376630e+03 3.605860e+03 | ||||||
|  | 1332496830558333 2.618410e+05 2.235370e+05 5.500740e+03 1.189660e+03 8.365730e+03 4.016470e+03 1.042270e+03 3.821200e+03 | ||||||
|  | 1332496830566667 2.595030e+05 2.258400e+05 3.827930e+03 3.088840e+03 7.676140e+03 3.978310e+03 -3.570070e+02 3.016420e+03 | ||||||
|  | 1332496830575000 2.534570e+05 2.246360e+05 4.914610e+03 3.097450e+03 8.224900e+03 4.321440e+03 1.713740e+02 2.412360e+03 | ||||||
|  | 1332496830583333 2.560290e+05 2.222210e+05 6.841800e+03 1.028500e+03 9.252300e+03 4.387570e+03 2.418140e+03 2.510100e+03 | ||||||
|  | 1332496830591667 2.628400e+05 2.225500e+05 6.210250e+03 1.410730e+03 8.538900e+03 4.152580e+03 3.009300e+03 3.219760e+03 | ||||||
|  | 1332496830600000 2.616330e+05 2.250650e+05 4.284530e+03 3.357210e+03 7.282170e+03 3.823590e+03 1.402840e+03 3.644670e+03 | ||||||
|  | 1332496830608333 2.545910e+05 2.251090e+05 4.693160e+03 3.647740e+03 7.745160e+03 3.686380e+03 4.901610e+02 3.448860e+03 | ||||||
|  | 1332496830616667 2.547800e+05 2.235990e+05 6.527380e+03 1.569870e+03 9.438430e+03 3.456580e+03 1.162520e+03 3.252010e+03 | ||||||
|  | 1332496830625000 2.606390e+05 2.241070e+05 6.531050e+03 1.633050e+03 9.283720e+03 4.174020e+03 2.089550e+03 2.775750e+03 | ||||||
|  | 1332496830633333 2.611080e+05 2.254720e+05 4.968260e+03 3.527850e+03 7.692870e+03 5.137100e+03 2.207390e+03 2.436660e+03 | ||||||
|  | 1332496830641667 2.557750e+05 2.237080e+05 4.963450e+03 4.017370e+03 7.701420e+03 5.269650e+03 2.284400e+03 2.842080e+03 | ||||||
|  | 1332496830650000 2.573980e+05 2.209470e+05 6.767500e+03 1.645710e+03 9.107070e+03 4.000180e+03 2.548860e+03 3.624770e+03 | ||||||
|  | 1332496830658333 2.649240e+05 2.215590e+05 6.471460e+03 1.110330e+03 9.459650e+03 3.108170e+03 1.696970e+03 3.893440e+03 | ||||||
|  | 1332496830666667 2.653390e+05 2.257330e+05 4.348800e+03 3.459510e+03 8.475300e+03 4.031240e+03 5.733470e+02 2.910270e+03 | ||||||
|  | 1332496830675000 2.568140e+05 2.269950e+05 3.479540e+03 4.949790e+03 7.499910e+03 5.624710e+03 7.516560e+02 2.347710e+03 | ||||||
|  | 1332496830683333 2.533160e+05 2.251610e+05 5.147060e+03 3.218430e+03 8.460160e+03 5.869300e+03 2.336320e+03 2.987960e+03 | ||||||
|  | 1332496830691667 2.593600e+05 2.231010e+05 5.549120e+03 1.869950e+03 8.740760e+03 4.668940e+03 2.457910e+03 3.758820e+03 | ||||||
|  | 1332496830700000 2.620120e+05 2.240160e+05 4.173610e+03 3.004130e+03 8.157040e+03 3.704730e+03 9.879640e+02 3.652750e+03 | ||||||
|  | 1332496830708333 2.571760e+05 2.244200e+05 3.517300e+03 4.118750e+03 7.822240e+03 3.718230e+03 3.726490e+01 2.953680e+03 | ||||||
|  | 1332496830716667 2.551460e+05 2.233220e+05 4.923980e+03 2.330680e+03 9.095910e+03 3.792400e+03 1.013070e+03 2.711240e+03 | ||||||
|  | 1332496830725000 2.605240e+05 2.236510e+05 5.413630e+03 1.146210e+03 8.817170e+03 4.419650e+03 2.446650e+03 2.832050e+03 | ||||||
|  | 1332496830733333 2.620980e+05 2.257520e+05 4.262980e+03 2.270970e+03 7.135480e+03 5.067120e+03 2.294680e+03 3.376620e+03 | ||||||
|  | 1332496830741667 2.568890e+05 2.253790e+05 3.606460e+03 3.568190e+03 6.552650e+03 4.970270e+03 1.516380e+03 3.662570e+03 | ||||||
|  | 1332496830750000 2.539480e+05 2.226310e+05 5.511700e+03 2.066300e+03 7.952660e+03 4.019910e+03 1.513140e+03 3.752630e+03 | ||||||
|  | 1332496830758333 2.597990e+05 2.220670e+05 5.873500e+03 6.085840e+02 9.253780e+03 2.870740e+03 1.348240e+03 3.344200e+03 | ||||||
|  | 1332496830766667 2.625470e+05 2.249010e+05 4.346080e+03 1.928100e+03 8.590970e+03 3.455460e+03 9.043910e+02 2.379270e+03 | ||||||
|  | 1332496830775000 2.561370e+05 2.267610e+05 3.423560e+03 3.379080e+03 7.471150e+03 4.894170e+03 1.153540e+03 2.031410e+03 | ||||||
|  | 1332496830783333 2.503260e+05 2.250130e+05 5.519980e+03 2.423970e+03 7.991760e+03 5.117950e+03 2.098790e+03 3.099240e+03 | ||||||
|  | 1332496830791667 2.554540e+05 2.229920e+05 6.547950e+03 4.964960e+02 8.751340e+03 3.900560e+03 2.132290e+03 4.076810e+03 | ||||||
|  | 1332496830800000 2.612860e+05 2.234890e+05 5.152850e+03 1.501510e+03 8.425610e+03 2.888030e+03 7.761140e+02 3.786360e+03 | ||||||
|  | 1332496830808333 2.589690e+05 2.240690e+05 3.832610e+03 3.001980e+03 7.979260e+03 3.182310e+03 5.271600e+01 2.874800e+03 | ||||||
|  | 1332496830816667 2.549460e+05 2.220350e+05 5.317880e+03 2.139800e+03 9.103140e+03 3.955610e+03 1.235170e+03 2.394150e+03 | ||||||
|  | 1332496830825000 2.586760e+05 2.212050e+05 6.594910e+03 5.053440e+02 9.423360e+03 4.562470e+03 2.913740e+03 2.892350e+03 | ||||||
|  | 1332496830833333 2.621250e+05 2.235660e+05 5.116750e+03 1.773600e+03 8.082200e+03 4.776370e+03 2.386390e+03 3.659730e+03 | ||||||
|  | 1332496830841667 2.578350e+05 2.259180e+05 3.714300e+03 3.477080e+03 7.205370e+03 4.554610e+03 7.115390e+02 3.878420e+03 | ||||||
|  | 1332496830850000 2.536600e+05 2.243710e+05 5.022450e+03 2.592430e+03 8.277200e+03 4.119370e+03 4.865080e+02 3.666740e+03 | ||||||
|  | 1332496830858333 2.595030e+05 2.220610e+05 6.589950e+03 6.599360e+02 9.596920e+03 3.598100e+03 1.702490e+03 3.036600e+03 | ||||||
|  | 1332496830866667 2.654950e+05 2.228430e+05 5.541850e+03 1.728430e+03 8.459960e+03 4.492000e+03 2.231970e+03 2.430620e+03 | ||||||
|  | 1332496830875000 2.609290e+05 2.249960e+05 4.000950e+03 3.745990e+03 6.983790e+03 5.430860e+03 1.855260e+03 2.533380e+03 | ||||||
|  | 1332496830883333 2.527160e+05 2.243350e+05 5.086560e+03 3.401150e+03 7.597970e+03 5.196120e+03 1.755720e+03 3.079760e+03 | ||||||
|  | 1332496830891667 2.541100e+05 2.231110e+05 6.822190e+03 1.229080e+03 9.164340e+03 3.761230e+03 1.679390e+03 3.584880e+03 | ||||||
|  | 1332496830900000 2.599690e+05 2.246930e+05 6.183950e+03 1.538500e+03 9.222080e+03 3.139170e+03 9.499020e+02 3.180800e+03 | ||||||
|  | 1332496830908333 2.590780e+05 2.269130e+05 4.388890e+03 3.694820e+03 8.195020e+03 3.933000e+03 4.260800e+02 2.388450e+03 | ||||||
|  | 1332496830916667 2.545630e+05 2.247600e+05 5.168440e+03 4.020940e+03 8.450270e+03 4.758910e+03 1.458900e+03 2.286430e+03 | ||||||
|  | 1332496830925000 2.580590e+05 2.212170e+05 6.883460e+03 1.649530e+03 9.232780e+03 4.457650e+03 3.057820e+03 3.031950e+03 | ||||||
|  | 1332496830933333 2.646670e+05 2.211770e+05 6.218510e+03 1.645730e+03 8.657180e+03 3.663500e+03 2.528280e+03 3.978340e+03 | ||||||
|  | 1332496830941667 2.629250e+05 2.243820e+05 4.627500e+03 3.635930e+03 7.892800e+03 3.431320e+03 6.045090e+02 3.901370e+03 | ||||||
|  | 1332496830950000 2.547080e+05 2.254480e+05 4.408250e+03 4.461040e+03 8.197170e+03 3.953750e+03 -4.453460e+01 3.154870e+03 | ||||||
|  | 1332496830958333 2.537020e+05 2.246350e+05 5.825770e+03 2.577050e+03 9.590050e+03 4.569250e+03 1.460270e+03 2.785170e+03 | ||||||
|  | 1332496830966667 2.602060e+05 2.241400e+05 5.387980e+03 1.951160e+03 8.789510e+03 5.131660e+03 2.706380e+03 2.972480e+03 | ||||||
|  | 1332496830975000 2.612400e+05 2.247370e+05 3.860810e+03 3.418310e+03 7.414530e+03 5.284520e+03 2.271380e+03 3.183150e+03 | ||||||
|  | 1332496830983333 2.561400e+05 2.232520e+05 3.850010e+03 3.957140e+03 7.262650e+03 4.964640e+03 1.499510e+03 3.453130e+03 | ||||||
|  | 1332496830991667 2.561160e+05 2.213490e+05 5.594480e+03 2.054400e+03 8.835130e+03 3.662010e+03 1.485510e+03 3.613010e+03 | ||||||
							
								
								
									
										28
									
								
								tests/data/extract-8
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										28
									
								
								tests/data/extract-8
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,28 @@ | |||||||
|  | # interval-start 1332496919900000 | ||||||
|  | 1332496919900000 2.523050e+05 2.254020e+05 4.779410e+03 3.638030e+03 8.138070e+03 4.334460e+03 1.083780e+03 3.743730e+03 | ||||||
|  | 1332496919908333 2.551190e+05 2.237870e+05 5.965640e+03 2.076350e+03 9.468790e+03 3.693880e+03 1.247860e+03 3.393680e+03 | ||||||
|  | 1332496919916667 2.616370e+05 2.247980e+05 4.848970e+03 2.315620e+03 9.323300e+03 4.225460e+03 1.805780e+03 2.593050e+03 | ||||||
|  | 1332496919925000 2.606460e+05 2.251300e+05 3.061360e+03 3.951840e+03 7.662910e+03 5.341410e+03 1.986520e+03 2.276780e+03 | ||||||
|  | 1332496919933333 2.559710e+05 2.235030e+05 4.096030e+03 3.296970e+03 7.827080e+03 5.452120e+03 2.492520e+03 2.929450e+03 | ||||||
|  | 1332496919941667 2.579260e+05 2.217080e+05 5.472320e+03 1.555700e+03 8.495760e+03 4.491140e+03 2.379780e+03 3.741710e+03 | ||||||
|  | 1332496919950000 2.610180e+05 2.242350e+05 4.669770e+03 1.876190e+03 8.366680e+03 3.677510e+03 9.021690e+02 3.549040e+03 | ||||||
|  | 1332496919958333 2.569150e+05 2.274650e+05 2.785070e+03 3.751930e+03 7.440320e+03 3.964860e+03 -3.227860e+02 2.460890e+03 | ||||||
|  | 1332496919966667 2.509510e+05 2.262000e+05 3.772710e+03 3.131950e+03 8.159860e+03 4.539860e+03 7.375190e+02 2.126750e+03 | ||||||
|  | 1332496919975000 2.556710e+05 2.223720e+05 5.826200e+03 8.715560e+02 9.120240e+03 4.545110e+03 2.804310e+03 2.721000e+03 | ||||||
|  | 1332496919983333 2.649730e+05 2.214860e+05 5.839130e+03 4.659180e+02 8.628300e+03 3.934870e+03 2.972490e+03 3.773730e+03 | ||||||
|  | 1332496919991667 2.652170e+05 2.233920e+05 3.718770e+03 2.834970e+03 7.209900e+03 3.460260e+03 1.324930e+03 4.075960e+03 | ||||||
|  | # interval-end 1332496919991668 | ||||||
|  | # interval-start 1332496920000000 | ||||||
|  | 1332496920000000 2.564370e+05 2.244300e+05 4.011610e+03 3.475340e+03 7.495890e+03 3.388940e+03 2.613970e+02 3.731260e+03 | ||||||
|  | 1332496920008333 2.539630e+05 2.241670e+05 5.621070e+03 1.548010e+03 9.165170e+03 3.522930e+03 1.058930e+03 2.996960e+03 | ||||||
|  | 1332496920016667 2.585080e+05 2.249300e+05 6.011400e+03 8.188660e+02 9.039950e+03 4.482440e+03 2.490390e+03 2.679340e+03 | ||||||
|  | 1332496920025000 2.596270e+05 2.260220e+05 4.474500e+03 2.423020e+03 7.414190e+03 5.071970e+03 2.439380e+03 2.962960e+03 | ||||||
|  | 1332496920033333 2.551870e+05 2.246320e+05 4.738570e+03 3.398040e+03 7.395120e+03 4.726450e+03 1.839030e+03 3.393530e+03 | ||||||
|  | 1332496920041667 2.571020e+05 2.216230e+05 6.144130e+03 1.441090e+03 8.756480e+03 3.495320e+03 1.869940e+03 3.752530e+03 | ||||||
|  | 1332496920050000 2.636530e+05 2.217700e+05 6.221770e+03 7.389620e+02 9.547600e+03 2.666820e+03 1.462660e+03 3.332570e+03 | ||||||
|  | 1332496920058333 2.636130e+05 2.252560e+05 4.477120e+03 2.437450e+03 8.510210e+03 3.855630e+03 9.594420e+02 2.387180e+03 | ||||||
|  | 1332496920066667 2.553500e+05 2.262640e+05 4.283720e+03 3.923940e+03 7.912470e+03 5.466520e+03 1.284990e+03 2.093720e+03 | ||||||
|  | 1332496920075000 2.527270e+05 2.246090e+05 5.851930e+03 2.491980e+03 8.540630e+03 5.623050e+03 2.339780e+03 3.007140e+03 | ||||||
|  | 1332496920083333 2.584750e+05 2.235780e+05 5.924870e+03 1.394480e+03 8.779620e+03 4.544180e+03 2.132030e+03 3.849760e+03 | ||||||
|  | 1332496920091667 2.615630e+05 2.246090e+05 4.336140e+03 2.455750e+03 8.055380e+03 3.469110e+03 6.278730e+02 3.664200e+03 | ||||||
|  | # interval-end 1332496920100000 | ||||||
Some files were not shown because too many files have changed in this diff Show More
		Reference in New Issue
	
	Block a user