You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 

198 lines
7.9 KiB

  1. #!/usr/bin/env python3
  2. # Spectral envelope preprocessor.
  3. # Requires two streams as input: the original raw data, and sinefit data.
  4. from nilmdb.utils.printf import printf
  5. from nilmdb.utils.time import timestamp_to_human
  6. import nilmtools.filter
  7. import nilmdb.client
  8. from numpy import pi, zeros, r_, e, real, imag
  9. import scipy.fftpack
  10. import scipy.signal
  11. import bisect
  12. from nilmdb.utils.interval import Interval
  13. def main(argv=None):
  14. # Set up argument parser
  15. f = nilmtools.filter.Filter()
  16. parser = f.setup_parser("Spectral Envelope Preprocessor", skip_paths=True)
  17. group = parser.add_argument_group("Prep options")
  18. group.add_argument("-c", "--column", action="store", type=int,
  19. help="Column number (first data column is 1)")
  20. group.add_argument("-n", "--nharm", action="store", type=int, default=4,
  21. help="number of odd harmonics to compute (default 4)")
  22. group.add_argument("-N", "--nshift", action="store", type=int, default=1,
  23. help="number of shifted FFTs per period (default 1)")
  24. exc = group.add_mutually_exclusive_group()
  25. exc.add_argument("-r", "--rotate", action="store", type=float,
  26. help="rotate FFT output by this many degrees (default 0)")
  27. exc.add_argument("-R", "--rotate-rad", action="store", type=float,
  28. help="rotate FFT output by this many radians (default 0)")
  29. group.add_argument("srcpath", action="store",
  30. help="Path of raw input, e.g. /foo/raw")
  31. group.add_argument("sinepath", action="store",
  32. help="Path of sinefit input, e.g. /foo/sinefit")
  33. group.add_argument("destpath", action="store",
  34. help="Path of prep output, e.g. /foo/prep")
  35. # Parse arguments
  36. try:
  37. args = f.parse_args(argv)
  38. except nilmtools.filter.MissingDestination as e:
  39. rec = "float32_%d" % (e.parsed_args.nharm * 2)
  40. print("Source is %s (%s)" % (e.src.path, e.src.layout))
  41. print("Destination %s doesn't exist" % (e.dest.path))
  42. print("You could make it with a command like:")
  43. print(" nilmtool -u %s create %s %s" % (e.dest.url, e.dest.path, rec))
  44. raise SystemExit(1)
  45. # Check arguments
  46. if args.column is None or args.column < 1:
  47. parser.error("need a column number >= 1")
  48. if args.nharm < 1 or args.nharm > 32:
  49. parser.error("number of odd harmonics must be 1-32")
  50. if args.nshift < 1:
  51. parser.error("number of shifted FFTs must be >= 1")
  52. if args.rotate is not None:
  53. rotation = args.rotate * 2.0 * pi / 360.0
  54. else:
  55. rotation = args.rotate_rad or 0.0
  56. if f.dest.layout_count != args.nharm * 2:
  57. print("error: need", args.nharm*2, "columns in destination stream")
  58. raise SystemExit(1)
  59. # Check the sine fit stream
  60. client_sinefit = nilmdb.client.Client(args.url)
  61. sinefit = nilmtools.filter.get_stream_info(client_sinefit, args.sinepath)
  62. if not sinefit:
  63. raise Exception("sinefit data not found")
  64. if sinefit.layout != "float32_3":
  65. raise Exception("sinefit data type is " + sinefit.layout
  66. + "; expected float32_3")
  67. # Check and set metadata in prep stream
  68. f.check_dest_metadata({"prep_raw_source": f.src.path,
  69. "prep_sinefit_source": sinefit.path,
  70. "prep_column": args.column,
  71. "prep_rotation": repr(rotation),
  72. "prep_nshift": args.nshift})
  73. # Find the intersection of the usual set of intervals we'd filter,
  74. # and the intervals actually present in sinefit data. This is
  75. # what we will process.
  76. filter_int = f.intervals()
  77. sinefit_int = (Interval(start, end) for (start, end) in
  78. client_sinefit.stream_intervals(
  79. args.sinepath, start=f.start, end=f.end))
  80. intervals = nilmdb.utils.interval.intersection(filter_int, sinefit_int)
  81. # Run the process (using the helper in the filter module)
  82. f.process_numpy(process, args=(client_sinefit, sinefit.path, args.column,
  83. args.nharm, rotation, args.nshift),
  84. intervals=intervals)
  85. def process(data, interval, args, insert_function, final):
  86. (client, sinefit_path, column, nharm, rotation, nshift) = args
  87. rows = data.shape[0]
  88. data_timestamps = data[:, 0]
  89. if rows < 2:
  90. return 0
  91. last_inserted = [nilmdb.utils.time.min_timestamp]
  92. def insert_if_nonoverlapping(data):
  93. """Call insert_function to insert data, but only if this
  94. data doesn't overlap with other data that we inserted."""
  95. if data[0][0] <= last_inserted[0]: # pragma: no cover
  96. # Getting coverage here is hard -- not sure exactly when
  97. # it gets triggered or why this was added; probably some
  98. # unlikely edge condition with timestamp rounding or something.
  99. return
  100. last_inserted[0] = data[-1][0]
  101. insert_function(data)
  102. processed = 0
  103. out = zeros((1, nharm * 2 + 1))
  104. # Pull out sinefit data for the entire time range of this block
  105. for sinefit_line in client.stream_extract(sinefit_path,
  106. data[0, 0], data[rows-1, 0]):
  107. def prep_period(t_min, t_max, rot):
  108. """
  109. Compute prep coefficients from time t_min to t_max, which
  110. are the timestamps of the start and end of one period.
  111. Results are rotated by an additional extra_rot before
  112. being inserted into the database. Returns the maximum
  113. index processed, or None if the period couldn't be
  114. processed.
  115. """
  116. # Find the indices of data that correspond to (t_min, t_max)
  117. idx_min = bisect.bisect_left(data_timestamps, t_min)
  118. idx_max = bisect.bisect_left(data_timestamps, t_max)
  119. if idx_min >= idx_max or idx_max >= len(data_timestamps):
  120. return None
  121. # Perform FFT over those indices
  122. N = idx_max - idx_min
  123. d = data[idx_min:idx_max, column]
  124. F = scipy.fftpack.fft(d) * 2.0 / N
  125. # If we wanted more harmonics than the FFT gave us, pad with zeros
  126. if N < (nharm * 2):
  127. F = r_[F, zeros(nharm * 2 - N)]
  128. # Fill output data.
  129. out[0, 0] = round(t_min)
  130. for k in range(nharm):
  131. Fk = F[2 * k + 1] * e**(rot * 1j * (k+1))
  132. out[0, 2 * k + 1] = -imag(Fk) # Pk
  133. out[0, 2 * k + 2] = real(Fk) # Qk
  134. insert_if_nonoverlapping(out)
  135. return idx_max
  136. # Extract sinefit data to get zero crossing timestamps.
  137. # t_min = beginning of period
  138. # t_max = end of period
  139. (t_min, f0, A, C) = [float(x) for x in sinefit_line.split()]
  140. t_max = t_min + 1e6 / f0
  141. # Compute prep over shifted windows of the period
  142. # (nshift is typically 1)
  143. for n in range(nshift):
  144. # Compute timestamps and rotations for shifted window
  145. time_shift = n * (t_max - t_min) / nshift
  146. shifted_min = t_min + time_shift
  147. shifted_max = t_max + time_shift
  148. angle_shift = n * 2 * pi / nshift
  149. shifted_rot = rotation - angle_shift
  150. # Run prep computation
  151. idx_max = prep_period(shifted_min, shifted_max, shifted_rot)
  152. if not idx_max:
  153. break
  154. processed = idx_max
  155. # If we processed no data but there's lots in here, pretend we
  156. # processed half of it.
  157. if processed == 0 and rows > 10000:
  158. processed = rows // 2
  159. printf("%s: warning: no periods found; skipping %d rows\n",
  160. timestamp_to_human(data[0][0]), processed)
  161. else:
  162. printf("%s: processed %d of %d rows\n",
  163. timestamp_to_human(data[0][0]), processed, rows)
  164. return processed
  165. if __name__ == "__main__":
  166. main()