Profiling --------- .. contents:: Table of Contents :local: The Python community has written some excellent profiling tools. This page includes some suggestions about using them to profile your code. Installing ``line_profile`` ~~~~~~~~~~~~~~~~~~~~~~~~~~~ Using easy_install:: $ easy_install --user line_profile Using ``line_profile`` ~~~~~~~~~~~~~~~~~~~~~~ Begin by instantiating a profiler:: >>> import line_profiler >>> ip = line_profiler.LineProfiler() You must instruct this profiler to collect statistics on particular functions:: >>> import pydfmux >>> ip.add_function(pydfmux.algorithms.measurement_algs.take_netanal.take_netanal) >>> import dfmux_mezzanine_qc >>> results = lp.run('dfmux_mezzanine_qc.main()') Your profiling results are now available via the ``results`` object:: >>> results.print_stats() This call produces a large amount of output, including the following:: File: /home/gsmecher/winterland/pydfmux/algorithms/measurement_algs/take_netanal.py Function: sweep_netanal at line 185 Line # Hits Time Per Hit % Time Line Contents ============================================================== 185 def sweep_netanal(rmod, freqmin, freqmax, npts, nsamps, 186 amp, target, logger=False): 187 """The worker function of the take_netanal.py module. 188 189 This function actually sets the frequencies and amplitudes, records the 190 data, calculates the phase and magnitude information, and outputs this to a 191 dictionary. It is hidden from tab-completion as it should not be called on 192 its own. Use the take_netanal function instead. 193 194 Args: 195 rmod: A ReadoutModule hardware-map object with which to take a network 196 analysis. 197 198 freqmin: The starting frequency of the scan (in Hz). Default: 2e5 199 200 freqmax: The ending frequency of the scan (in Hz). Default: 5e6 201 202 npts: The number of points to take in the scan. NOTE -- this will be 203 rounded to the next multiple of the number of channels being used 204 (typically 64, but the code queries the board to ask what 205 mux-factor it can do). Default: 500 206 207 nsamps: The number of samples to take at each frequency (how long to 208 integrate). Default: 128 209 210 amp: Amplitude in Normalized units for each frequency to use. Remember 211 that in general there will be 64 such frequencies simultaneously, 212 so divide up that dynamic range accordingly. Default: 0.01 213 214 target: The DAC to use in the scan. Options are 'nuller' and 'carrier' 215 (also accessible as dfmux attributes as d.TARGET.NULLER, d.TARGET.CARRIER) 216 217 logger: Standard logging object being used. 218 """ 219 8 79 9.9 0.0 d = rmod.iceboard 220 8 227 28.4 0.0 rail_monitor = RailMonitor(rmod, logger=logger) # This is a class instantiation of the rail monitor 221 8 18 2.2 0.0 if not logger: 222 logger = _LM.get_child_logger(target=rmod, extra_field_dict={}) 223 logger.info('STARTING Algorithm') 224 225 8 30 3.8 0.0 if target == d.TARGET.NULLER: 226 4 462 115.5 0.0 logger.info('Taking nuller network analysis.') 227 4 19 4.8 0.0 internal_target = d.ROUTING.NULLER 228 else: 229 4 612 153.0 0.0 logger.info('Taking carrier network analysis.') 230 4 63 15.8 0.0 internal_target = d.ROUTING.CARRIER 231 232 8 218829 27353.6 0.1 chanlist = rmod.channels.channel 233 234 8 24 3.0 0.0 if npts % len(chanlist): 235 8 18 2.2 0.0 npts += len(chanlist) - npts % len(chanlist) 236 8 1056 132.0 0.0 logger.info("Selected number of points doesn't evenly divide the available channels. Rounding up to {0} points".format(npts)) 237 238 8 420 52.5 0.0 freqlist = np.linspace(freqmin, freqmax, npts) 239 8 170 21.2 0.0 freqlist = np.around(freqlist, decimals=1) # Just making the output easier 240 8 20 2.5 0.0 niters = npts / len(chanlist) 241 8 14 1.8 0.0 maglist = [] 242 8 12 1.5 0.0 phaselist = [] 243 8 11 1.4 0.0 noiselist = [] 244 8 12 1.5 0.0 vdemodlist = [] 245 8 12 1.5 0.0 amps_raw = [] 246 8 12 1.5 0.0 amps_cal = [] 247 8 12 1.5 0.0 overloadcountlist = [] 248 249 8 4520884 565110.5 1.6 rmod.channels.set_amplitude(amp, d.UNITS.NORMALIZED, target) 250 251 72 189 2.6 0.0 for iiter in range(niters): 252 64 8432 131.8 0.0 logger.info('Taking frequency set {0} of {1}'.format(iiter + 1, niters)) 253 254 # Resetting the DMFD routing, for safety 255 64 865 13.5 0.0 rmod.set_dmfd_routing(target=d.ROUTING.ADC, target_module=rmod.module, 256 64 389958 6093.1 0.1 target_mezzanine=rmod.mezzanine.mezzanine) 257 4160 321058 77.2 0.1 for channel in rmod.channels: 258 4096 32020 7.8 0.0 channel.set_frequency(freqlist[(iiter * 64) + channel.channel - 1], 259 4096 21271228 5193.2 7.7 d.UNITS.HZ, d.TARGET.DEMOD) 260 4096 33903 8.3 0.0 channel.set_frequency(freqlist[(iiter * 64) + channel.channel - 1], 261 4096 21723862 5303.7 7.9 d.UNITS.HZ, target) 262 263 64 5116163 79940.0 1.9 rail_monitor.check_DAC_rail(target=target) 264 64 4448112 69501.8 1.6 rail_monitor.check_ADC_rail() 265 64 9319 145.6 0.0 logger.debug('Getting Samples') 266 64 87460093 1366564.0 31.7 samples = rmod.get_samples(num_samples=nsamps) 267 64 8825 137.9 0.0 logger.debug('Getting Calibration Rerouted Samples') 268 64 750 11.7 0.0 rmod.set_dmfd_routing(target=internal_target, target_module=rmod.module, 269 64 605957 9468.1 0.2 target_mezzanine=rmod.mezzanine.mezzanine) 270 64 4263775 66621.5 1.5 rail_monitor.check_ADC_rail() 271 64 88379403 1380928.2 32.0 cal_samples = rmod.get_samples(num_samples=nsamps) 272 273 # Calculate the quantities we want as we go 274 4160 366895 88.2 0.1 for chan in rmod.channels: 275 4096 21363718 5215.8 7.7 chanfreq = chan.get_frequency(d.UNITS.HZ, d.TARGET.NULLER) 276 4096 84324 20.6 0.0 pfb_cor = 1.0 / pfb_gains.predict_amplitude_factor(chanfreq) 277 4096 137108 33.5 0.0 complexData = (pfb_cor * np.array(samples.i[chan.channel - 1]).astype('float64')) + \ 278 4096 123754 30.2 0.0 (pfb_cor * np.array(samples.q[chan.channel - 1]).astype('float64') * 1.j) 279 4096 84030 20.5 0.0 complexCalData = (pfb_cor * np.array(cal_samples.i[chan.channel - 1]).astype('float64')) + \ 280 4096 100469 24.5 0.0 (pfb_cor * np.array(cal_samples.q[chan.channel - 1]).astype('float64') * 1.j) 281 4096 6633 1.6 0.0 if amp == 0.: 282 complexCalData = complexData * 0. + 1. 283 284 4096 22743 5.6 0.0 fullData = complexData / complexCalData 285 4096 139277 34.0 0.1 meanData = np.mean(fullData) 286 4096 7907 1.9 0.0 mag = abs(meanData) 287 4096 29091 7.1 0.0 phase = np.arctan2(meanData.imag, meanData.real) * 57.2957795 # in degrees 288 4096 12232 3.0 0.0 phase %= 360 # ensure 0 <= phase < 360 289 290 4096 6775 1.7 0.0 maglist.append(mag) 291 4096 5494 1.3 0.0 phaselist.append(phase) 292 4096 236471 57.7 0.1 noiselist.append(np.std(abs(fullData)) / np.sqrt(nsamps)) 293 4096 76083 18.6 0.0 raw = np.mean(complexData) 294 4096 61301 15.0 0.0 cal = np.mean(complexCalData) 295 4096 6409 1.6 0.0 amps_raw.append(raw) 296 4096 5776 1.4 0.0 amps_cal.append(cal) 297 # amps_raw.append(np.mean(complexData)) 298 # amps_cal.append(np.mean(complexCalData)) 299 64 293283 4582.5 0.1 overloadcountlist.append(rmod.get_overload_count(d.TARGET.DEMOD)) 300 64 335 5.2 0.0 vdemodlist.append(rmod.get_fast_samples(units=d.UNITS.NORMALIZED, 301 64 93 1.5 0.0 average=True, 302 64 5987434 93553.7 2.2 target=d.TARGET.DEMOD)) 303 304 8 1128 141.0 0.0 logger.debug('Finishing, Restoring re-routing, re-zeroing channels') 305 306 8 47077 5884.6 0.0 rmod.set_dmfd_routing(target=d.ROUTING.ADC, target_module=rmod.module, target_mezzanine=rmod.mezzanine.mezzanine) 307 8 2790021 348752.6 1.0 rmod.channels.set_amplitude(0, d.UNITS.NORMALIZED, target) 308 8 2797171 349646.4 1.0 rmod.channels.set_frequency(0, d.UNITS.HZ, target) 309 8 2678879 334859.9 1.0 rmod.channels.set_frequency(0, d.UNITS.HZ, d.TARGET.DEMOD) 310 311 8 30 3.8 0.0 data = { 312 8 20 2.5 0.0 'description': 'Network Analysis using the {0}'.format(target), 313 8 13 1.6 0.0 'freq': freqlist, 314 8 12 1.5 0.0 'amp': maglist, 315 8 14 1.8 0.0 'phase': phaselist, 316 8 9 1.1 0.0 'noise': noiselist, 317 8 10 1.2 0.0 'vdemod': vdemodlist, 318 8 9 1.1 0.0 'overloadcount': overloadcountlist, 319 8 11 1.4 0.0 'amp_raw': amps_raw, 320 8 10 1.2 0.0 'amp_cal': amps_cal} 321 322 8 11 1.4 0.0 return data This output immediately tells us that ``get_samples()`` calls are responsible for 63% of the script's runtime. The remaining calls are possibly worth optimizing, but `Amdahl's Law `_ tells us we'll never be able to halve the runtime unless we tackle ``get_samples()``.