qubic.run

AbstractCircuitRunner implementation which runs locally on the ZCU216. Used by soc_rpc_server to load and run circuits. Can also be used locally, if user environment is on the ZCU216.

`CircuitRunner`

Bases: AbstractCircuitRunner

Class for taking a program in binary/ASM form and running it on the FPGA. Currently, this class is meant to be run on the QubiC FPGA PS + pynq system. It will load and configure the specified PL bitfile, and can then be used to configure PL memory and registers, and read back data from experiments.

Attributes:

Name	Type	Description
`_pl_driver`	`PLInterface`	used for low level access to memory and registers
`loaded_channels`	`list`	channels with a program currently loaded

Source code in qubic/run.py

class CircuitRunner(AbstractCircuitRunner):
    """
    Class for taking a program in binary/ASM form and running it on 
    the FPGA. Currently, this class is meant to be run on the QubiC FPGA 
    PS + pynq system. It will load and configure the specified PL bitfile,
    and can then be used to configure PL memory and registers, and read 
    back data from experiments.

    Attributes
    ----------
    _pl_driver: pl.PLInterface 
        used for low level access to memory and registers
    loaded_channels: list 
        channels with a program currently loaded
    """

    def __init__(self, platform: str = 'rfsoc', commit: str = '81f773e5', load_xsa: bool = True):
        if platform == 'rfsoc':
            self._pl_driver = pl.PLInterface(commit)
            self._pl_driver.refclks(lmk_freq=500.18)
            logging.getLogger(__name__).info(f'loading bitfile: {commit}')
            self._pl_driver.load_overlay(download=load_xsa)
            logging.getLogger(__name__).info(f'mts: {self._pl_driver.mts()}')
            self._pl_driver.dacnyquist(2)
            self._pl_driver.adcnyquist(1)
            self._pl_driver.write_reg('dspreset', 0)
            self._pl_driver.write_reg("mixbb1sel", 0)
            self._pl_driver.write_reg("mixbb2sel", 0)
            self._pl_driver.write_reg("shift", 12)

        else:
            raise Exception('rfsoc is the only implemented platform!')

        self.loaded_channels = []


    def load_and_run(self, rawasm: dict, n_total_shots: int, reads_per_shot: int = 1): 
        """
        Load circuit described by rawasm "binary", then run for n_total_shots. 

        Parameters
        ----------
        rawasm: dict
        n_total_shots: int
            number of shots to run. Program is restarted from the beginning 
            for each new shot
        reads_per_shot: int | dict
            number of values per shot per channel to read back from accbuf. If dict, indexed
            by str(channel_number) (same indices as raw_asm_list). If int, assumed to be 
            the same across channels. Unless multiple circuits were rastered pre-compilation or 
            there is mid-circuit measurement involved this is typically 1

        Returns
        -------
        dict:
            Complex IQ shots for each accbuf in chanlist; each array has 
            shape `(n_total_shots, reads_per_shot)`
        """
        self.load_circuit(rawasm)
        return self.run_circuit(n_total_shots, reads_per_shot)

    def load_circuit(self, rawasm: dict, zero: bool = True, load_commands: bool = True, 
                     load_freqs: bool = True, load_envs: bool = True):
        """
        Load circuit described by rawasm "binary", which is the output of 
        the final distributed proc assembler stage. Loads command memory, env memory
        and freq buffer memory, according to specified input parameters. Before circuit is loaded, 
        if zero=True, all channels are zeroed out using zero_command_buf()

        Parameters
        ----------
        rawasm: dict
            keys are channels to load. For each channel, there should be:

                - 'cmd_buf' : byte array containing compiled program binary
                - 'env_buffers' : dict of env buffers for that channel:
                    0 : qdrv buffer
                    1 : rdrv buffer
                    2 : rdlo buffer
                - 'freq_buffers' : dict of freq buffers for that channel:
                    0 : qdrv buffer
                    1 : rdrv buffer
                    2 : rdlo buffer
        zero: bool
            if True, (default), zero out all cmd buffers before loading circuit
        load_commands: bool
            if True, (default), load command buffers
        load_freqs: bool
            if True, (default), load freq buffers
        load_envs: bool
            if True, (default), load env buffers
        """

        if zero:
            self.zero_command_buf()
        for chan_key, chan_asm in rawasm.items():
            if load_commands:
                self.load_command_buf(chan_key, chan_asm['cmd_buf'])
            for i, chan_type in enumerate(ELEM_CHAN_TYPES): #todo: put these somewhere as parameters
                if load_envs:
                    self.load_env_buf(chan_type, chan_key, chan_asm['env_buffers'][i])
                if load_freqs:
                    self.load_freq_buf(chan_type, chan_key, chan_asm['freq_buffers'][i])

    def load_command_buf(self, core_key: str, cmd_buf: bytes | Binary):
        """
        Load cmd_buf into the command buffer of core core_key.

        Parameters
        ----------
        core_key: str
            str index of core mem to load
        cmd_buf: bytes or Binary
        """
        if isinstance(cmd_buf, Binary):
            cmd_buf = cmd_buf.data
        self._pl_driver.write_cmd_buf(core_key, cmd_buf)
        if core_key not in self.loaded_channels:
            self.loaded_channels.append(core_key)

    def zero_command_buf(self, core_keys: List[str | int] = None):
        """
        Loads command memory with dummy asm program: reset phase, 
        output done signal, then idle. This is useful/necessary if 
        a new program is loaded on a subset of cores such that the 
        previous program is not completely overwritten (e.g. you 
        are loading a program that runs only on core 2, and the 
        previous program used cores 2 and 3).

        Parameters
        ----------
        core_keys: list
            list of channels (proc cores) to load. Defaults to
            all channels in currently loaded gateware.
        """
        if core_keys is None:
            core_keys = [str(i) for i in range(self._pl_driver.nproc)]

        rdrvelemcfg = hw.RFSoCElementCfg(16, 16)
        asm0 = am.SingleCoreAssembler([rdrvelemcfg, rdrvelemcfg, rdrvelemcfg])
        asm0.add_phase_reset()
        asm0.add_done_stb()
        cmd0, _, _ = asm0.get_compiled_program()

        for i in core_keys:
            self.load_command_buf(i, cmd0)

        for i in core_keys:
            if i in self.loaded_channels:
                self.loaded_channels.remove(i)

    def load_env_buf(self, chan_type: str, core_key: str, env_buf: bytes | Binary):
        """
        Load envelope buffer into specified chan_type (qdrv, rdrv, rdlo) 
        and core_key

        Parameters
        ----------
        chan_type: str
            'qdrv', 'rdrv', or 'rdlo'
        core_key: str
            str index of core mem to load
        env_buf: bytes or Binary
        """
        if isinstance(env_buf, Binary):
            env_buf = env_buf.data
        self._pl_driver.write_env_buf(chan_type, core_key, env_buf)

    def load_freq_buf(self, chan_type: str, core_key: str, freq_buf: bytes | Binary):
        """
        Load frequency buffer into specified chan_type (qdrv, rdrv, rdlo) 
        and core_key

        Parameters
        ----------
        chan_type: str
            'qdrv', 'rdrv', or 'rdlo'
        core_key: str
            str index of core mem to load
        freq_buf: bytes or Binary
        """
        if isinstance(freq_buf, Binary):
            freq_buf = freq_buf.data
        self._pl_driver.write_freq_buf(chan_type, core_key, freq_buf)

    def run_circuit_batch(self, 
                          raw_asm_list: List[Dict], 
                          n_total_shots: int, 
                          reads_per_shot: int = 1, 
                          timeout_per_shot: float = 8,
                          reload_cmd: bool = True, 
                          reload_freq: bool = True, 
                          reload_env: bool = True, 
                          zero_between_reload: bool = True,
                          from_server: bool = False):
        """
        Runs a batch of circuits given by a list of raw_asm "binaries". Each circuit is run n_total_shots
        times. `reads_per_shot` and `n_total_shots` are passed directly into `run_circuit`, and must
        be the same for all circuits in the batch. The parameters `reload_cmd`, `reload_freq`, `reload_env`, and 
        `zero_between_reload` control which of these fields is rewritten circuit-to-circuit (everything is 
        rewritten initially). Leave these all at `True` (default) for maximum safety, to ensure that QubiC 
        is in a clean state before each run. Depending on the circuits, some of these can be turned off 
        to save time.

        TODO: consider throwing some version of all the args here into a BatchedCircuitRun or somesuch
        object

        Parameters
        ----------
        raw_asm_list: list
            list of raw_asm binaries to run
        n_total_shots: int
            number of shots per circuit
        reads_per_shot: int | dict
            number of values per shot per channel to read back from accbuf. If dict, indexed
            by str(channel_number) (same indices as raw_asm_list). If int, assumed to be 
            the same across channels. Unless multiple circuits were rastered pre-compilation or 
            there is mid-circuit measurement involved this is typically 1
        timeout_per_shot: float
            job will time out if time to take a single shot exceeds this value in seconds 
            (this likely means the job is hanging due to timing issues in the program or gateware)
        reload_cmd: bool
            if True, reload command buffer between circuits
        reload_freq: bool
            if True, reload freq buffer between circuits
        reload_env: bool
            if True, reload env buffer between circuits
        from_server: bool
            set to true if calling over RPC. If True, pack returned s11 arrays into
            byte objects
        Returns
        -------
        dict:
            Complex IQ shots for each accbuf in chanlist; each array has 
            shape `(len(raw_asm_list), n_total_shots, reads_per_shot)`
        """
        channels = set().union(*list(set(prog.keys()) for prog in raw_asm_list)) # union of all proc channels in batch
        if isinstance(reads_per_shot, int):
            reads_per_shot = {chan: reads_per_shot for chan in channels}
        s11 = {ch: np.nan*np.zeros((len(raw_asm_list), n_total_shots, reads_per_shot[ch]), dtype=np.complex128) for ch in channels}
        #TODO: using the channels in the first raw_asm_list elem is hacky, should figure out
        # a better way to initialize
        for i, raw_asm in enumerate(tqdm(raw_asm_list)):
            logging.getLogger(__name__).info(f'starting circuit {i}/{len(raw_asm_list)-1}')
            if i==0:
                self.load_circuit(raw_asm, True, True, True, True)
            else:

                self.load_circuit(raw_asm, zero=zero_between_reload, load_commands=reload_cmd,load_freqs=reload_freq, load_envs=reload_env)

            s11_i = self.run_circuit(n_total_shots, reads_per_shot, timeout_per_shot)

            for ch in s11_i.keys():
                s11[ch][i] = s11_i[ch]

        if from_server:
            for ch in s11.keys():
                s11[ch] = s11[ch].tobytes()
        logging.getLogger(__name__).info('batch finished')
        return s11

    def load_and_run_acq(self, 
                         raw_asm_prog: Dict, 
                         n_total_shots: int = 1, 
                         nsamples: int = 8192, 
                         acq_chans: Dict[str, int] = {'0':0,'1':1}, 
                         trig_delay: float = 0, 
                         decimator: int = 0, 
                         return_acc: bool = False, 
                         from_server: bool = False):
        """
        Load the program given by `raw_asm_prog` and acquire raw (or downconverted) adc traces.

        Parameters
        ----------
        raw_asm_prog: dict
            ASM binary to run. See load_circuit for details.
        n_total_shots: int
            number of shots to run. Program is restarted from the beginning 
            for each new shot
        nsamples: int
            number of samples to read from the acq buffer
        acq_chans: dict
            current channel mapping is:

                '0': ADC_237_2 (main readout ADC)
                '1': ADC_237_0 (other ADC connected in gateware)
                TODO: figure out DLO channels, etc and what they mean
        trig_delay: float
            time to delay acquisition, relative to circuit start.
            NOTE: this value, when converted to units of clock cycles, is a 
            16-bit value. So, it maxes out at CLK_PERIOD*(2**16) = 131.072e-6
        decimator: int
            decimation interval when sampling. e.g. 0 means full sample rate, 1
            means capture every other sample, 2 means capture every third sample, etc
        return_acc: bool
            if True, return a single acc (integrated + accumulated readout) value per shot,
            on each loaded channel. Default is False.
        from_server: bool
            set to true if calling over RPC. If True, pack returned acq arrays into
            byte objects

        Returns
        -------
        tuple | Dict
            - if `return_acc` is `False`:

                - dict:
                    array of acq samples for each channel in acq_chans with shape (n_total_shots, nsamples)

            - if `return_acc` is `True`:

                - tuple:
                    - dict:
                        array of acq samples for each channel in acq_chans with shape `(n_total_shots, nsamples)`
                    - dict:
                        array of acc values for each loaded channel with length `n_total_shots`

        """
        self.load_circuit(raw_asm_prog)
        return self.run_circuit_acq(n_total_shots, nsamples, acq_chans, trig_delay, decimator, return_acc, from_server)

    def run_circuit(self, 
                    n_total_shots: int, 
                    reads_per_shot: int = 1, 
                    timeout_per_shot: float = 8, 
                    from_server: bool = False):
        """
        Run the currently loaded program and acquire integrated IQ shots. Program is
        run `n_total_shots` times, in batches of size `shots_per_run` (i.e. `shots_per_run` runs of the program
        are executed in logic before each readback/restart cycle). The current gateware 
        is limited to ~1000 reads in its IQ buffer, which generally means 
        shots_per_run = 1000//reads_per_shot

        Parameters
        ----------
        n_total_shots: int
            number of shots to run. Program is restarted from the beginning 
            for each new shot
        reads_per_shot: int | dict
            number of values per shot per channel to read back from accbuf. If `dict`, indexed
            by str(channel_number) (same indices as `raw_asm_list`). If `int`, assumed to be 
            the same across channels. Unless multiple circuits were rastered pre-compilation or 
            there is mid-circuit measurement involved this is typically 1
        timeout_per_shot: float
            job will time out if time to take a single shot exceeds this value in seconds 
            (this likely means the job is hanging due to timing issues in the program or gateware)
        from_server: bool
            set to true if calling over RPC. If `True`, pack returned s11 arrays into
            byte objects

        Returns
        -------
        dict:
            Complex IQ shots for each accbuf in `chanlist`; each array has 
            shape `(n_total_shots, reads_per_shot)`
        """
        if isinstance(reads_per_shot, int):
            reads_per_shot = {chan: reads_per_shot for chan in self.loaded_channels}

        logging.getLogger(__name__).info(f'starting circuit with {n_total_shots} shots')

        max_reads_per_shot = max(list(reads_per_shot.values()))

        shots_per_run = min(ACC_BUF_SIZE//max_reads_per_shot, n_total_shots)
        n_runs = int(np.ceil(n_total_shots/shots_per_run))
        s11 = {ch: np.zeros((shots_per_run*n_runs, reads_per_shot[ch]), dtype=np.complex128) for ch in self.loaded_channels}

        for i in range(n_runs):

            result = self._pl_driver.run_prog_acc(self.loaded_channels, shots_per_run, reads_per_shot=reads_per_shot, timeout_per_shot=timeout_per_shot)
            for ch in self.loaded_channels:
                s11[ch][i*shots_per_run : (i + 1)*shots_per_run, :] = result[ch].reshape((shots_per_run, reads_per_shot[ch]))

        #remove extraneous data
        if shots_per_run*n_runs > n_total_shots:
            for ch in self.loaded_channels:
                s11[ch] = s11[ch][:n_total_shots]

        if from_server:
            for ch in self.loaded_channels:
                s11[ch] = s11[ch].tobytes()

        logging.getLogger(__name__).info('done circuit')
        return s11

    def run_circuit_acq(self,
                        n_total_shots: int = 1, 
                        nsamples: int = 8192, 
                        acq_chans: Dict[str, int] = {'0':0,'1':1}, 
                        trig_delay: float = 0, 
                        decimator: int = 0, 
                        return_acc: bool = False, 
                        from_server: bool = False):
        """
        Run the currently loaded program and acquire raw (or downconverted) adc traces.

        Parameters
        ----------
        n_total_shots: int
            number of shots to run. Program is restarted from the beginning 
            for each new shot
        nsamples: int
            number of samples to read from the acq buffer
        acq_chans: dict
            current channel mapping is:

                '0': ADC_237_2 (main readout ADC)
                '1': ADC_237_0 (other ADC connected in gateware)
                TODO: figure out DLO channels, etc and what they mean
        trig_delay: float
            time to delay acquisition, relative to circuit start.
            NOTE: this value, when converted to units of clock cycles, is a 
            16-bit value. So, it maxes out at CLK_PERIOD*(2**16) = 131.072e-6
        decimator: int
            decimation interval when sampling. e.g. 0 means full sample rate, 1
            means capture every other sample, 2 means capture every third sample, etc
        return_acc: bool
            if True, return a single acc (integrated + accumulated readout) value per shot,
            on each loaded channel. Default is False.
        from_server: bool
            set to true if calling over RPC. If True, pack returned acq arrays into
            byte objects

        Returns
        -------
        tuple | Dict
            - if return_acc is False:

                - dict:
                    array of acq samples for each channel in `acq_chans` with shape `(n_total_shots, nsamples)`

            - if return_acc is True:

                - tuple:
                    - dict:
                        array of acq samples for each channel in acq_chans with shape `(n_total_shots, nsamples)`
                    - dict:
                        array of acc values for each loaded channel with length `n_total_shots`

        """
        if nsamples > MAX_NSAMPLES:
            raise RuntimeError(f'{nsamples} exceeds max_nsamples length of {MAX_NSAMPLES}')

        if return_acc:
            acc_chans = self.loaded_channels
        else:
            acc_chans = []
        acq_data, acc_data = self._pl_driver.run_prog_acq(n_total_shots, nsamples, acq_chans, acc_chans,
                                                int(trig_delay/CLK_PERIOD), decimator)

        if from_server:
            for ch in acq_data.keys():
                acq_data[ch] = acq_data[ch].tobytes()
            for ch in acc_data.keys():
                acc_data[ch] = acc_data[ch].tobytes()

        if return_acc:
            return acq_data, acc_data

        else: 
            return acq_data

`load_and_run(rawasm, n_total_shots, reads_per_shot=1)`

Load circuit described by rawasm "binary", then run for n_total_shots.

Parameters:

Name	Type	Description	Default
`rawasm`	`dict`		required
`n_total_shots`	`int`	number of shots to run. Program is restarted from the beginning for each new shot	required
`reads_per_shot`	`int`	number of values per shot per channel to read back from accbuf. If dict, indexed by str(channel_number) (same indices as raw_asm_list). If int, assumed to be the same across channels. Unless multiple circuits were rastered pre-compilation or there is mid-circuit measurement involved this is typically 1	`1`

Returns:

Name	Type	Description
`dict`		Complex IQ shots for each accbuf in chanlist; each array has shape `(n_total_shots, reads_per_shot)`

Source code in qubic/run.py

def load_and_run(self, rawasm: dict, n_total_shots: int, reads_per_shot: int = 1): 
    """
    Load circuit described by rawasm "binary", then run for n_total_shots. 

    Parameters
    ----------
    rawasm: dict
    n_total_shots: int
        number of shots to run. Program is restarted from the beginning 
        for each new shot
    reads_per_shot: int | dict
        number of values per shot per channel to read back from accbuf. If dict, indexed
        by str(channel_number) (same indices as raw_asm_list). If int, assumed to be 
        the same across channels. Unless multiple circuits were rastered pre-compilation or 
        there is mid-circuit measurement involved this is typically 1

    Returns
    -------
    dict:
        Complex IQ shots for each accbuf in chanlist; each array has 
        shape `(n_total_shots, reads_per_shot)`
    """
    self.load_circuit(rawasm)
    return self.run_circuit(n_total_shots, reads_per_shot)

`load_and_run_acq(raw_asm_prog, n_total_shots=1, nsamples=8192, acq_chans={'0': 0, '1': 1}, trig_delay=0, decimator=0, return_acc=False, from_server=False)`

Load the program given by raw_asm_prog and acquire raw (or downconverted) adc traces.

Parameters:

Name	Type	Description	Default
`raw_asm_prog`	`Dict`	ASM binary to run. See load_circuit for details.	required
`n_total_shots`	`int`	number of shots to run. Program is restarted from the beginning for each new shot	`1`
`nsamples`	`int`	number of samples to read from the acq buffer	`8192`
`acq_chans`	`Dict[str, int]`	current channel mapping is: `'0': ADC_237_2 (main readout ADC) '1': ADC_237_0 (other ADC connected in gateware) TODO: figure out DLO channels, etc and what they mean`	`{'0': 0, '1': 1}`
`trig_delay`	`float`	time to delay acquisition, relative to circuit start. NOTE: this value, when converted to units of clock cycles, is a 16-bit value. So, it maxes out at CLK_PERIOD(2*16) = 131.072e-6	`0`
`decimator`	`int`	decimation interval when sampling. e.g. 0 means full sample rate, 1 means capture every other sample, 2 means capture every third sample, etc	`0`
`return_acc`	`bool`	if True, return a single acc (integrated + accumulated readout) value per shot, on each loaded channel. Default is False.	`False`
`from_server`	`bool`	set to true if calling over RPC. If True, pack returned acq arrays into byte objects	`False`

Returns:

Type	Description
`tuple \| Dict`	if `return_acc` is `False`: dict: array of acq samples for each channel in acq_chans with shape (n_total_shots, nsamples) if `return_acc` is `True`: tuple: dict: array of acq samples for each channel in acq_chans with shape `(n_total_shots, nsamples)` dict: array of acc values for each loaded channel with length `n_total_shots`

Source code in qubic/run.py

def load_and_run_acq(self, 
                     raw_asm_prog: Dict, 
                     n_total_shots: int = 1, 
                     nsamples: int = 8192, 
                     acq_chans: Dict[str, int] = {'0':0,'1':1}, 
                     trig_delay: float = 0, 
                     decimator: int = 0, 
                     return_acc: bool = False, 
                     from_server: bool = False):
    """
    Load the program given by `raw_asm_prog` and acquire raw (or downconverted) adc traces.

    Parameters
    ----------
    raw_asm_prog: dict
        ASM binary to run. See load_circuit for details.
    n_total_shots: int
        number of shots to run. Program is restarted from the beginning 
        for each new shot
    nsamples: int
        number of samples to read from the acq buffer
    acq_chans: dict
        current channel mapping is:

            '0': ADC_237_2 (main readout ADC)
            '1': ADC_237_0 (other ADC connected in gateware)
            TODO: figure out DLO channels, etc and what they mean
    trig_delay: float
        time to delay acquisition, relative to circuit start.
        NOTE: this value, when converted to units of clock cycles, is a 
        16-bit value. So, it maxes out at CLK_PERIOD*(2**16) = 131.072e-6
    decimator: int
        decimation interval when sampling. e.g. 0 means full sample rate, 1
        means capture every other sample, 2 means capture every third sample, etc
    return_acc: bool
        if True, return a single acc (integrated + accumulated readout) value per shot,
        on each loaded channel. Default is False.
    from_server: bool
        set to true if calling over RPC. If True, pack returned acq arrays into
        byte objects

    Returns
    -------
    tuple | Dict
        - if `return_acc` is `False`:

            - dict:
                array of acq samples for each channel in acq_chans with shape (n_total_shots, nsamples)

        - if `return_acc` is `True`:

            - tuple:
                - dict:
                    array of acq samples for each channel in acq_chans with shape `(n_total_shots, nsamples)`
                - dict:
                    array of acc values for each loaded channel with length `n_total_shots`

    """
    self.load_circuit(raw_asm_prog)
    return self.run_circuit_acq(n_total_shots, nsamples, acq_chans, trig_delay, decimator, return_acc, from_server)

`load_circuit(rawasm, zero=True, load_commands=True, load_freqs=True, load_envs=True)`

Load circuit described by rawasm "binary", which is the output of the final distributed proc assembler stage. Loads command memory, env memory and freq buffer memory, according to specified input parameters. Before circuit is loaded, if zero=True, all channels are zeroed out using zero_command_buf()

Parameters:

Name	Type	Description	Default
`rawasm`	`dict`	keys are channels to load. For each channel, there should be: `- 'cmd_buf' : byte array containing compiled program binary - 'env_buffers' : dict of env buffers for that channel: 0 : qdrv buffer 1 : rdrv buffer 2 : rdlo buffer - 'freq_buffers' : dict of freq buffers for that channel: 0 : qdrv buffer 1 : rdrv buffer 2 : rdlo buffer`	required
`zero`	`bool`	if True, (default), zero out all cmd buffers before loading circuit	`True`
`load_commands`	`bool`	if True, (default), load command buffers	`True`
`load_freqs`	`bool`	if True, (default), load freq buffers	`True`
`load_envs`	`bool`	if True, (default), load env buffers	`True`

Source code in qubic/run.py

def load_circuit(self, rawasm: dict, zero: bool = True, load_commands: bool = True, 
                 load_freqs: bool = True, load_envs: bool = True):
    """
    Load circuit described by rawasm "binary", which is the output of 
    the final distributed proc assembler stage. Loads command memory, env memory
    and freq buffer memory, according to specified input parameters. Before circuit is loaded, 
    if zero=True, all channels are zeroed out using zero_command_buf()

    Parameters
    ----------
    rawasm: dict
        keys are channels to load. For each channel, there should be:

            - 'cmd_buf' : byte array containing compiled program binary
            - 'env_buffers' : dict of env buffers for that channel:
                0 : qdrv buffer
                1 : rdrv buffer
                2 : rdlo buffer
            - 'freq_buffers' : dict of freq buffers for that channel:
                0 : qdrv buffer
                1 : rdrv buffer
                2 : rdlo buffer
    zero: bool
        if True, (default), zero out all cmd buffers before loading circuit
    load_commands: bool
        if True, (default), load command buffers
    load_freqs: bool
        if True, (default), load freq buffers
    load_envs: bool
        if True, (default), load env buffers
    """

    if zero:
        self.zero_command_buf()
    for chan_key, chan_asm in rawasm.items():
        if load_commands:
            self.load_command_buf(chan_key, chan_asm['cmd_buf'])
        for i, chan_type in enumerate(ELEM_CHAN_TYPES): #todo: put these somewhere as parameters
            if load_envs:
                self.load_env_buf(chan_type, chan_key, chan_asm['env_buffers'][i])
            if load_freqs:
                self.load_freq_buf(chan_type, chan_key, chan_asm['freq_buffers'][i])

`load_command_buf(core_key, cmd_buf)`

Load cmd_buf into the command buffer of core core_key.

Parameters:

Name	Type	Description	Default
`core_key`	`str`	str index of core mem to load	required
`cmd_buf`	`bytes \| Binary`		required

Source code in qubic/run.py

def load_command_buf(self, core_key: str, cmd_buf: bytes | Binary):
    """
    Load cmd_buf into the command buffer of core core_key.

    Parameters
    ----------
    core_key: str
        str index of core mem to load
    cmd_buf: bytes or Binary
    """
    if isinstance(cmd_buf, Binary):
        cmd_buf = cmd_buf.data
    self._pl_driver.write_cmd_buf(core_key, cmd_buf)
    if core_key not in self.loaded_channels:
        self.loaded_channels.append(core_key)

`load_env_buf(chan_type, core_key, env_buf)`

Load envelope buffer into specified chan_type (qdrv, rdrv, rdlo) and core_key

Parameters:

Name	Type	Description	Default
`chan_type`	`str`	'qdrv', 'rdrv', or 'rdlo'	required
`core_key`	`str`	str index of core mem to load	required
`env_buf`	`bytes \| Binary`		required

Source code in qubic/run.py

def load_env_buf(self, chan_type: str, core_key: str, env_buf: bytes | Binary):
    """
    Load envelope buffer into specified chan_type (qdrv, rdrv, rdlo) 
    and core_key

    Parameters
    ----------
    chan_type: str
        'qdrv', 'rdrv', or 'rdlo'
    core_key: str
        str index of core mem to load
    env_buf: bytes or Binary
    """
    if isinstance(env_buf, Binary):
        env_buf = env_buf.data
    self._pl_driver.write_env_buf(chan_type, core_key, env_buf)

`load_freq_buf(chan_type, core_key, freq_buf)`

Load frequency buffer into specified chan_type (qdrv, rdrv, rdlo) and core_key

Parameters:

Name	Type	Description	Default
`chan_type`	`str`	'qdrv', 'rdrv', or 'rdlo'	required
`core_key`	`str`	str index of core mem to load	required
`freq_buf`	`bytes \| Binary`		required

Source code in qubic/run.py

def load_freq_buf(self, chan_type: str, core_key: str, freq_buf: bytes | Binary):
    """
    Load frequency buffer into specified chan_type (qdrv, rdrv, rdlo) 
    and core_key

    Parameters
    ----------
    chan_type: str
        'qdrv', 'rdrv', or 'rdlo'
    core_key: str
        str index of core mem to load
    freq_buf: bytes or Binary
    """
    if isinstance(freq_buf, Binary):
        freq_buf = freq_buf.data
    self._pl_driver.write_freq_buf(chan_type, core_key, freq_buf)

`run_circuit(n_total_shots, reads_per_shot=1, timeout_per_shot=8, from_server=False)`

Run the currently loaded program and acquire integrated IQ shots. Program is run n_total_shots times, in batches of size shots_per_run (i.e. shots_per_run runs of the program are executed in logic before each readback/restart cycle). The current gateware is limited to ~1000 reads in its IQ buffer, which generally means shots_per_run = 1000//reads_per_shot

Parameters:

Name	Type	Description	Default
`n_total_shots`	`int`	number of shots to run. Program is restarted from the beginning for each new shot	required
`reads_per_shot`	`int`	number of values per shot per channel to read back from accbuf. If `dict`, indexed by str(channel_number) (same indices as `raw_asm_list`). If `int`, assumed to be the same across channels. Unless multiple circuits were rastered pre-compilation or there is mid-circuit measurement involved this is typically 1	`1`
`timeout_per_shot`	`float`	job will time out if time to take a single shot exceeds this value in seconds (this likely means the job is hanging due to timing issues in the program or gateware)	`8`
`from_server`	`bool`	set to true if calling over RPC. If `True`, pack returned s11 arrays into byte objects	`False`

Returns:

Name	Type	Description
`dict`		Complex IQ shots for each accbuf in `chanlist`; each array has shape `(n_total_shots, reads_per_shot)`

Source code in qubic/run.py

def run_circuit(self, 
                n_total_shots: int, 
                reads_per_shot: int = 1, 
                timeout_per_shot: float = 8, 
                from_server: bool = False):
    """
    Run the currently loaded program and acquire integrated IQ shots. Program is
    run `n_total_shots` times, in batches of size `shots_per_run` (i.e. `shots_per_run` runs of the program
    are executed in logic before each readback/restart cycle). The current gateware 
    is limited to ~1000 reads in its IQ buffer, which generally means 
    shots_per_run = 1000//reads_per_shot

    Parameters
    ----------
    n_total_shots: int
        number of shots to run. Program is restarted from the beginning 
        for each new shot
    reads_per_shot: int | dict
        number of values per shot per channel to read back from accbuf. If `dict`, indexed
        by str(channel_number) (same indices as `raw_asm_list`). If `int`, assumed to be 
        the same across channels. Unless multiple circuits were rastered pre-compilation or 
        there is mid-circuit measurement involved this is typically 1
    timeout_per_shot: float
        job will time out if time to take a single shot exceeds this value in seconds 
        (this likely means the job is hanging due to timing issues in the program or gateware)
    from_server: bool
        set to true if calling over RPC. If `True`, pack returned s11 arrays into
        byte objects

    Returns
    -------
    dict:
        Complex IQ shots for each accbuf in `chanlist`; each array has 
        shape `(n_total_shots, reads_per_shot)`
    """
    if isinstance(reads_per_shot, int):
        reads_per_shot = {chan: reads_per_shot for chan in self.loaded_channels}

    logging.getLogger(__name__).info(f'starting circuit with {n_total_shots} shots')

    max_reads_per_shot = max(list(reads_per_shot.values()))

    shots_per_run = min(ACC_BUF_SIZE//max_reads_per_shot, n_total_shots)
    n_runs = int(np.ceil(n_total_shots/shots_per_run))
    s11 = {ch: np.zeros((shots_per_run*n_runs, reads_per_shot[ch]), dtype=np.complex128) for ch in self.loaded_channels}

    for i in range(n_runs):

        result = self._pl_driver.run_prog_acc(self.loaded_channels, shots_per_run, reads_per_shot=reads_per_shot, timeout_per_shot=timeout_per_shot)
        for ch in self.loaded_channels:
            s11[ch][i*shots_per_run : (i + 1)*shots_per_run, :] = result[ch].reshape((shots_per_run, reads_per_shot[ch]))

    #remove extraneous data
    if shots_per_run*n_runs > n_total_shots:
        for ch in self.loaded_channels:
            s11[ch] = s11[ch][:n_total_shots]

    if from_server:
        for ch in self.loaded_channels:
            s11[ch] = s11[ch].tobytes()

    logging.getLogger(__name__).info('done circuit')
    return s11

`run_circuit_acq(n_total_shots=1, nsamples=8192, acq_chans={'0': 0, '1': 1}, trig_delay=0, decimator=0, return_acc=False, from_server=False)`

Run the currently loaded program and acquire raw (or downconverted) adc traces.

Parameters:

Name	Type	Description	Default
`n_total_shots`	`int`	number of shots to run. Program is restarted from the beginning for each new shot	`1`
`nsamples`	`int`	number of samples to read from the acq buffer	`8192`
`acq_chans`	`Dict[str, int]`	current channel mapping is: `'0': ADC_237_2 (main readout ADC) '1': ADC_237_0 (other ADC connected in gateware) TODO: figure out DLO channels, etc and what they mean`	`{'0': 0, '1': 1}`
`trig_delay`	`float`	time to delay acquisition, relative to circuit start. NOTE: this value, when converted to units of clock cycles, is a 16-bit value. So, it maxes out at CLK_PERIOD(2*16) = 131.072e-6	`0`
`decimator`	`int`	decimation interval when sampling. e.g. 0 means full sample rate, 1 means capture every other sample, 2 means capture every third sample, etc	`0`
`return_acc`	`bool`	if True, return a single acc (integrated + accumulated readout) value per shot, on each loaded channel. Default is False.	`False`
`from_server`	`bool`	set to true if calling over RPC. If True, pack returned acq arrays into byte objects	`False`

Returns:

Type	Description
`tuple \| Dict`	if return_acc is False: dict: array of acq samples for each channel in `acq_chans` with shape `(n_total_shots, nsamples)` if return_acc is True: tuple: dict: array of acq samples for each channel in acq_chans with shape `(n_total_shots, nsamples)` dict: array of acc values for each loaded channel with length `n_total_shots`

Source code in qubic/run.py

def run_circuit_acq(self,
                    n_total_shots: int = 1, 
                    nsamples: int = 8192, 
                    acq_chans: Dict[str, int] = {'0':0,'1':1}, 
                    trig_delay: float = 0, 
                    decimator: int = 0, 
                    return_acc: bool = False, 
                    from_server: bool = False):
    """
    Run the currently loaded program and acquire raw (or downconverted) adc traces.

    Parameters
    ----------
    n_total_shots: int
        number of shots to run. Program is restarted from the beginning 
        for each new shot
    nsamples: int
        number of samples to read from the acq buffer
    acq_chans: dict
        current channel mapping is:

            '0': ADC_237_2 (main readout ADC)
            '1': ADC_237_0 (other ADC connected in gateware)
            TODO: figure out DLO channels, etc and what they mean
    trig_delay: float
        time to delay acquisition, relative to circuit start.
        NOTE: this value, when converted to units of clock cycles, is a 
        16-bit value. So, it maxes out at CLK_PERIOD*(2**16) = 131.072e-6
    decimator: int
        decimation interval when sampling. e.g. 0 means full sample rate, 1
        means capture every other sample, 2 means capture every third sample, etc
    return_acc: bool
        if True, return a single acc (integrated + accumulated readout) value per shot,
        on each loaded channel. Default is False.
    from_server: bool
        set to true if calling over RPC. If True, pack returned acq arrays into
        byte objects

    Returns
    -------
    tuple | Dict
        - if return_acc is False:

            - dict:
                array of acq samples for each channel in `acq_chans` with shape `(n_total_shots, nsamples)`

        - if return_acc is True:

            - tuple:
                - dict:
                    array of acq samples for each channel in acq_chans with shape `(n_total_shots, nsamples)`
                - dict:
                    array of acc values for each loaded channel with length `n_total_shots`

    """
    if nsamples > MAX_NSAMPLES:
        raise RuntimeError(f'{nsamples} exceeds max_nsamples length of {MAX_NSAMPLES}')

    if return_acc:
        acc_chans = self.loaded_channels
    else:
        acc_chans = []
    acq_data, acc_data = self._pl_driver.run_prog_acq(n_total_shots, nsamples, acq_chans, acc_chans,
                                            int(trig_delay/CLK_PERIOD), decimator)

    if from_server:
        for ch in acq_data.keys():
            acq_data[ch] = acq_data[ch].tobytes()
        for ch in acc_data.keys():
            acc_data[ch] = acc_data[ch].tobytes()

    if return_acc:
        return acq_data, acc_data

    else: 
        return acq_data

`run_circuit_batch(raw_asm_list, n_total_shots, reads_per_shot=1, timeout_per_shot=8, reload_cmd=True, reload_freq=True, reload_env=True, zero_between_reload=True, from_server=False)`

Runs a batch of circuits given by a list of raw_asm "binaries". Each circuit is run n_total_shots times. reads_per_shot and n_total_shots are passed directly into run_circuit, and must be the same for all circuits in the batch. The parameters reload_cmd, reload_freq, reload_env, and zero_between_reload control which of these fields is rewritten circuit-to-circuit (everything is rewritten initially). Leave these all at True (default) for maximum safety, to ensure that QubiC is in a clean state before each run. Depending on the circuits, some of these can be turned off to save time.

TODO: consider throwing some version of all the args here into a BatchedCircuitRun or somesuch object

Parameters:

Name	Type	Description	Default
`raw_asm_list`	`List[Dict]`	list of raw_asm binaries to run	required
`n_total_shots`	`int`	number of shots per circuit	required
`reads_per_shot`	`int`	number of values per shot per channel to read back from accbuf. If dict, indexed by str(channel_number) (same indices as raw_asm_list). If int, assumed to be the same across channels. Unless multiple circuits were rastered pre-compilation or there is mid-circuit measurement involved this is typically 1	`1`
`timeout_per_shot`	`float`	job will time out if time to take a single shot exceeds this value in seconds (this likely means the job is hanging due to timing issues in the program or gateware)	`8`
`reload_cmd`	`bool`	if True, reload command buffer between circuits	`True`
`reload_freq`	`bool`	if True, reload freq buffer between circuits	`True`
`reload_env`	`bool`	if True, reload env buffer between circuits	`True`
`from_server`	`bool`	set to true if calling over RPC. If True, pack returned s11 arrays into byte objects	`False`

Returns:

Name	Type	Description
`dict`		Complex IQ shots for each accbuf in chanlist; each array has shape `(len(raw_asm_list), n_total_shots, reads_per_shot)`

Source code in qubic/run.py

def run_circuit_batch(self, 
                      raw_asm_list: List[Dict], 
                      n_total_shots: int, 
                      reads_per_shot: int = 1, 
                      timeout_per_shot: float = 8,
                      reload_cmd: bool = True, 
                      reload_freq: bool = True, 
                      reload_env: bool = True, 
                      zero_between_reload: bool = True,
                      from_server: bool = False):
    """
    Runs a batch of circuits given by a list of raw_asm "binaries". Each circuit is run n_total_shots
    times. `reads_per_shot` and `n_total_shots` are passed directly into `run_circuit`, and must
    be the same for all circuits in the batch. The parameters `reload_cmd`, `reload_freq`, `reload_env`, and 
    `zero_between_reload` control which of these fields is rewritten circuit-to-circuit (everything is 
    rewritten initially). Leave these all at `True` (default) for maximum safety, to ensure that QubiC 
    is in a clean state before each run. Depending on the circuits, some of these can be turned off 
    to save time.

    TODO: consider throwing some version of all the args here into a BatchedCircuitRun or somesuch
    object

    Parameters
    ----------
    raw_asm_list: list
        list of raw_asm binaries to run
    n_total_shots: int
        number of shots per circuit
    reads_per_shot: int | dict
        number of values per shot per channel to read back from accbuf. If dict, indexed
        by str(channel_number) (same indices as raw_asm_list). If int, assumed to be 
        the same across channels. Unless multiple circuits were rastered pre-compilation or 
        there is mid-circuit measurement involved this is typically 1
    timeout_per_shot: float
        job will time out if time to take a single shot exceeds this value in seconds 
        (this likely means the job is hanging due to timing issues in the program or gateware)
    reload_cmd: bool
        if True, reload command buffer between circuits
    reload_freq: bool
        if True, reload freq buffer between circuits
    reload_env: bool
        if True, reload env buffer between circuits
    from_server: bool
        set to true if calling over RPC. If True, pack returned s11 arrays into
        byte objects
    Returns
    -------
    dict:
        Complex IQ shots for each accbuf in chanlist; each array has 
        shape `(len(raw_asm_list), n_total_shots, reads_per_shot)`
    """
    channels = set().union(*list(set(prog.keys()) for prog in raw_asm_list)) # union of all proc channels in batch
    if isinstance(reads_per_shot, int):
        reads_per_shot = {chan: reads_per_shot for chan in channels}
    s11 = {ch: np.nan*np.zeros((len(raw_asm_list), n_total_shots, reads_per_shot[ch]), dtype=np.complex128) for ch in channels}
    #TODO: using the channels in the first raw_asm_list elem is hacky, should figure out
    # a better way to initialize
    for i, raw_asm in enumerate(tqdm(raw_asm_list)):
        logging.getLogger(__name__).info(f'starting circuit {i}/{len(raw_asm_list)-1}')
        if i==0:
            self.load_circuit(raw_asm, True, True, True, True)
        else:

            self.load_circuit(raw_asm, zero=zero_between_reload, load_commands=reload_cmd,load_freqs=reload_freq, load_envs=reload_env)

        s11_i = self.run_circuit(n_total_shots, reads_per_shot, timeout_per_shot)

        for ch in s11_i.keys():
            s11[ch][i] = s11_i[ch]

    if from_server:
        for ch in s11.keys():
            s11[ch] = s11[ch].tobytes()
    logging.getLogger(__name__).info('batch finished')
    return s11

`zero_command_buf(core_keys=None)`

Loads command memory with dummy asm program: reset phase, output done signal, then idle. This is useful/necessary if a new program is loaded on a subset of cores such that the previous program is not completely overwritten (e.g. you are loading a program that runs only on core 2, and the previous program used cores 2 and 3).

Parameters:

Name	Type	Description	Default
`core_keys`	`List[str \| int]`	list of channels (proc cores) to load. Defaults to all channels in currently loaded gateware.	`None`

Source code in qubic/run.py

def zero_command_buf(self, core_keys: List[str | int] = None):
    """
    Loads command memory with dummy asm program: reset phase, 
    output done signal, then idle. This is useful/necessary if 
    a new program is loaded on a subset of cores such that the 
    previous program is not completely overwritten (e.g. you 
    are loading a program that runs only on core 2, and the 
    previous program used cores 2 and 3).

    Parameters
    ----------
    core_keys: list
        list of channels (proc cores) to load. Defaults to
        all channels in currently loaded gateware.
    """
    if core_keys is None:
        core_keys = [str(i) for i in range(self._pl_driver.nproc)]

    rdrvelemcfg = hw.RFSoCElementCfg(16, 16)
    asm0 = am.SingleCoreAssembler([rdrvelemcfg, rdrvelemcfg, rdrvelemcfg])
    asm0.add_phase_reset()
    asm0.add_done_stb()
    cmd0, _, _ = asm0.get_compiled_program()

    for i in core_keys:
        self.load_command_buf(i, cmd0)

    for i in core_keys:
        if i in self.loaded_channels:
            self.loaded_channels.remove(i)