dnl Process this m4 file to produce 'C' language file.
dnl
dnl If you see this line, you can ignore the next one.
/* Do not edit this file. It is produced from the corresponding .m4 source */
dnl
/*
 *  Copyright (C) 2003, Northwestern University and Argonne National Laboratory
 *  See COPYRIGHT notice in top-level directory.
 */
/* $Id: ncmpio_i_getput.m4 3613 2017-12-18 01:02:56Z wkliao $ */

/*
 * This file implements the corresponding APIs defined in
 * src/dispatchers/var_getput.m4
 *
 * ncmpi_iget_var<kind>()        : dispatcher->iget_var()
 * ncmpi_iput_var<kind>()        : dispatcher->iput_var()
 * ncmpi_iget_var<kind>_<type>() : dispatcher->iget_var()
 * ncmpi_iput_var<kind>_<type>() : dispatcher->iput_var()
 */

#ifdef HAVE_CONFIG_H
# include <config.h>
#endif

#include <stdio.h>
#include <unistd.h>
#ifdef HAVE_STDLIB_H
#include <stdlib.h>
#endif
#include <limits.h> /* INT_MAX */
#include <assert.h>

#include <string.h> /* memcpy() */
#include <mpi.h>

#include <pnc_debug.h>
#include <common.h>
#include "ncmpio_NC.h"

/*----< abuf_malloc() >------------------------------------------------------*/
/* allocate memory space from the attached buffer pool */
static int
abuf_malloc(NC *ncp, MPI_Offset nbytes, void **buf, int *abuf_index)
{
    /* extend the table size if more entries are needed */
    if (ncp->abuf->tail + 1 == ncp->abuf->table_size) {
        ncp->abuf->table_size += NC_ABUF_DEFAULT_TABLE_SIZE;
        ncp->abuf->occupy_table = (NC_buf_status*)
                   NCI_Realloc(ncp->abuf->occupy_table,
                   (size_t)ncp->abuf->table_size * sizeof(NC_buf_status));
    }
    /* mark the new entry is used and store the requested buffer size */
    ncp->abuf->occupy_table[ncp->abuf->tail].is_used  = 1;
    ncp->abuf->occupy_table[ncp->abuf->tail].req_size = nbytes;
    *abuf_index = ncp->abuf->tail;

    *buf = (char*)ncp->abuf->buf + ncp->abuf->size_used;
    ncp->abuf->size_used += nbytes;
    ncp->abuf->tail++;

    return NC_NOERR;
}

/*----< abuf_dealloc() >-----------------------------------------------------*/
/* deallocate (actually un-register) memory space from the attached buffer
 * pool
 */
static int
abuf_dealloc(NC *ncp, int abuf_index)
{
    assert(abuf_index == ncp->abuf->tail - 1);

    /* mark the tail entry un-used */
    ncp->abuf->size_used -= ncp->abuf->occupy_table[abuf_index].req_size;
    ncp->abuf->occupy_table[abuf_index].req_size = 0;
    ncp->abuf->occupy_table[abuf_index].is_used  = 0;
    ncp->abuf->tail--;

    return NC_NOERR;
}

/*----< add_record_requests() >----------------------------------------------*/
/* check if this is a record variable. if yes, add new requests for each
 * record into the list. Hereinafter, treat each request as a non-record
 * variable request
 */
static int
add_record_requests(NC_var           *varp,
                    NC_req           *reqs,
                    const MPI_Offset *stride)
{
    int    i, j;
    size_t dims_chunk = (size_t)varp->ndims * SIZEOF_MPI_OFFSET;
    MPI_Offset record_bufcount, rec_bufsize;

    record_bufcount = 1;
    for (i=1; i<varp->ndims; i++)
        record_bufcount *= reqs[0].count[i];
    rec_bufsize = varp->xsz * record_bufcount;

    /* append each record to the end of list */
    for (i=1; i<reqs[0].count[0]; i++) {

        reqs[i] = reqs[0]; /* inherit most attributes from reqs[0]
                            * except below ones, including the ones need
                            * malloc
                            */

        if (stride != NULL)
            reqs[i].start = (MPI_Offset*) NCI_Malloc(dims_chunk*3);
        else
            reqs[i].start = (MPI_Offset*) NCI_Malloc(dims_chunk*2);

        reqs[i].count = reqs[i].start + varp->ndims;

        if (stride != NULL) {
            reqs[i].stride    = reqs[i].count + varp->ndims;
            reqs[i].start[0]  = reqs[0].start[0] + stride[0] * i;
            reqs[i].stride[0] = reqs[0].stride[0];
        } else {
            reqs[i].stride   = NULL;
            reqs[i].start[0] = reqs[0].start[0] + i;
        }

        reqs[i].count[0] = 1;
        for (j=1; j<varp->ndims; j++) {
            reqs[i].start[j]  = reqs[0].start[j];
            reqs[i].count[j]  = reqs[0].count[j];
            if (stride != NULL)
                reqs[i].stride[j] = reqs[0].stride[j];
        }

        /* xbuf cannot be NULL    assert(reqs[0].xbuf != NULL); */

        reqs[i].bnelems  = record_bufcount;
        reqs[i].buf      = (char*)(reqs[i-1].buf)  + rec_bufsize;
        reqs[i].xbuf     = (char*)(reqs[i-1].xbuf) + rec_bufsize;
        reqs[i].num_recs = 0;  /* not the lead request */

        /* reqs[i].bufcount and reqs[i].buftype will not be used in
         * wait call, only the lead request's matters */
    }

    /* reset the lead request to one record at a time */
    reqs[0].bnelems  = record_bufcount;
    reqs[0].count[0] = 1;

    return NC_NOERR;
}

/*----< ncmpio_igetput_varm() >-----------------------------------------------*/
int
ncmpio_igetput_varm(NC               *ncp,
                    NC_var           *varp,
                    const MPI_Offset  start[],
                    const MPI_Offset  count[],
                    const MPI_Offset  stride[],
                    const MPI_Offset  imap[],
                    void             *buf,      /* user buffer */
                    MPI_Offset        bufcount,
                    MPI_Datatype      buftype,
                    int              *reqid,    /* out, can be NULL */
                    int               reqMode,
                    int               isSameGroup) /* if part of a varn group */
{
    void *xbuf=NULL;
    int err=NC_NOERR;
    int i, abuf_index=-1, el_size, buftype_is_contig;
    int need_convert, need_swap, need_swap_back_buf=0;
    size_t  dims_chunk;
    MPI_Offset bnelems=0, nbytes;
    MPI_Datatype ptype, imaptype;
    NC_req *req;

    /* decode buftype to obtain the followings:
     * ptype:    element data type (MPI primitive type) in buftype
     * bufcount: If it is -1, then this is called from a high-level API and in
     *           this case buftype will be an MPI primitive data type.
     *           If it is >=0, then this is called from a flexible API.
     * bnelems:  number of ptypes in user buffer, buf
     * nbytes:   number of bytes (in external data representation) to read from
     *           or write to the file
     * el_size:  byte size of ptype
     * buftype_is_contig: whether buftype is contiguous
     */
    err = ncmpii_buftype_decode(varp->ndims, varp->xtype, count, bufcount,
                                buftype, &ptype, &el_size, &bnelems,
                                &nbytes, &buftype_is_contig);
    if (err != NC_NOERR) return err;

#ifndef ENABLE_LARGE_REQ
    if (nbytes > INT_MAX) DEBUG_RETURN_ERROR(NC_EMAX_REQ)
#endif

    if (bnelems == 0) {
        /* zero-length request, mark this as a NULL request */
        if (!isSameGroup && reqid != NULL)
            /* only if this is not part of a group request */
            *reqid = NC_REQ_NULL;
        return NC_NOERR;
    }

    /* check if type conversion and Endianness byte swap is needed */
    need_convert = ncmpii_need_convert(ncp->format, varp->xtype, ptype);
    need_swap    = ncmpii_need_swap(varp->xtype, ptype);

    /* check whether this is a true varm call, if yes, imaptype will be a
     * newly created MPI derived data type, otherwise MPI_DATATYPE_NULL
     */
    err = ncmpii_create_imaptype(varp->ndims, count, imap, ptype, &imaptype);
    if (err != NC_NOERR) return err;

    if (fIsSet(reqMode, NC_REQ_WR)) { /* pack request to xbuf */
#if 1
        /* when user buf is used as xbuf, we need to byte-swap buf
         * back to its original contents */
        xbuf = buf;
        need_swap_back_buf = 1;

        if (fIsSet(reqMode, NC_REQ_NBB)) {
            /* for bput call, check if the remaining buffer space is sufficient
             * to accommodate this request and obtain a space for xbuf
             */
            if (ncp->abuf->size_allocated - ncp->abuf->size_used < nbytes)
                DEBUG_RETURN_ERROR(NC_EINSUFFBUF)
            err = abuf_malloc(ncp, nbytes, &xbuf, &abuf_index);
            if (err != NC_NOERR) return err;
            need_swap_back_buf = 0;
        }
        else {
            if (!buftype_is_contig || imaptype != MPI_DATATYPE_NULL ||
                need_convert ||
#ifdef DISABLE_IN_PLACE_SWAP
                need_swap
#else
                nbytes <= NC_BYTE_SWAP_BUFFER_SIZE
#endif
            ) {
                xbuf = NCI_Malloc((size_t)nbytes);
                if (xbuf == NULL) DEBUG_RETURN_ERROR(NC_ENOMEM)
                need_swap_back_buf = 0;
            }
        }

        /* pack user buffer, buf, to xbuf which will be used to write to file */
        err = ncmpio_pack_xbuf(ncp->format, varp, bufcount, buftype,
                               buftype_is_contig, bnelems, ptype, imaptype,
                               need_convert, need_swap, nbytes, buf, xbuf);
        if (err != NC_NOERR && err != NC_ERANGE) {
            if (fIsSet(reqMode, NC_REQ_NBB)) abuf_dealloc(ncp, abuf_index);
            else                             NCI_Free(xbuf);
            return err;
        }
#else
        void *cbuf=NULL, *lbuf=NULL;
        int position;

        /* attached buffer allocation logic
         * if (fIsSet(reqMode, NC_REQ_NBB))
         *     if contig && no imap && no convert
         *         buf   ==   lbuf   ==   cbuf    ==     xbuf memcpy-> abuf
         *                                               abuf
         *     if contig && no imap &&    convert
         *         buf   ==   lbuf   ==   cbuf convert-> xbuf == abuf
         *                                               abuf
         *     if contig &&    imap && no convert
         *         buf   ==   lbuf pack-> cbuf    ==     xbuf == abuf
         *                                abuf
         *     if contig &&    imap &&    convert
         *         buf   ==   lbuf pack-> cbuf convert-> xbuf == abuf
         *                                               abuf
         *  if noncontig && no imap && no convert
         *         buf pack-> lbuf   ==   cbuf    ==     xbuf == abuf
         *                    abuf
         *  if noncontig && no imap &&    convert
         *         buf pack-> lbuf   ==   cbuf convert-> xbuf == abuf
         *                                               abuf
         *  if noncontig &&    imap && no convert
         *         buf pack-> lbuf pack-> cbuf    ==     xbuf == abuf
         *                                abuf
         *  if noncontig &&    imap &&    convert
         *         buf pack-> lbuf pack-> cbuf convert-> xbuf == abuf
         *                                               abuf
         */

        MPI_Offset ibufsize = bnelems * el_size;
        if (ibufsize != (int)ibufsize) DEBUG_RETURN_ERROR(NC_EINTOVERFLOW)

        /* Step 1: if buftype is not contiguous, i.e. a noncontiguous MPI
         * derived datatype, pack buf into a contiguous buffer, lbuf,
         */
        if (!buftype_is_contig) { /* buftype is not contiguous */
            if (imaptype == MPI_DATATYPE_NULL && !need_convert)
                /* in this case, lbuf will later become xbuf */
                lbuf = xbuf;
            else {
                /* in this case, allocate lbuf and it will be freed before
                 * constructing xbuf */
                lbuf = NCI_Malloc((size_t)ibufsize);
                if (lbuf == NULL) DEBUG_RETURN_ERROR(NC_ENOMEM)
            }

            /* pack buf into lbuf based on buftype */
            if (bufcount > INT_MAX) DEBUG_RETURN_ERROR(NC_EINTOVERFLOW)
            position = 0;
            MPI_Pack(buf, (int)bufcount, buftype, lbuf, (int)ibufsize,
                     &position, MPI_COMM_SELF);
        }
        else /* for contiguous case, we reuse buf */
            lbuf = buf;

        /* Step 2: if imap is non-contiguous, pack lbuf to cbuf */
        if (imaptype != MPI_DATATYPE_NULL) { /* true varm */
            if (!need_convert)
                /* in this case, cbuf will later become xbuf */
                cbuf = xbuf;
            else {
                /* in this case, allocate cbuf and cbuf will be freed before
                 * constructing xbuf */
                cbuf = NCI_Malloc((size_t)ibufsize);
                if (cbuf == NULL) DEBUG_RETURN_ERROR(NC_ENOMEM)
            }

            /* pack lbuf to cbuf based on imaptype */
            position = 0;
            MPI_Pack(lbuf, 1, imaptype, cbuf, (int)ibufsize, &position,
                     MPI_COMM_SELF);
            MPI_Type_free(&imaptype);

            /* lbuf is no longer needed */
            if (lbuf != buf) NCI_Free(lbuf);
        }
        else /* not a true varm call: reuse lbuf */
            cbuf = lbuf;

        /* Step 3: type-convert and byte-swap cbuf to xbuf, and xbuf will be
         * used in MPI write function to write to file
         */
        if (need_convert) {
            /* user buf type does not match nc var type defined in file */
            void *fillp; /* fill value in internal representation */

            /* find the fill value */
            fillp = NCI_Malloc((size_t)varp->xsz);
            ncmpio_inq_var_fill(varp, fillp);

            /* datatype conversion + byte-swap from cbuf to xbuf */
            DATATYPE_PUT_CONVERT(ncp->format, varp->xtype, xbuf, cbuf, bnelems,
                                 ptype, fillp, err)
            NCI_Free(fillp);

            /* The only error codes returned from DATATYPE_PUT_CONVERT are
             * NC_EBADTYPE or NC_ERANGE. Bad varp->xtype and itype have been
             * sanity checked at the dispatchers, so NC_EBADTYPE is not
             * possible. Thus, the only possible error is NC_ERANGE.
             * NC_ERANGE can be caused by one or more elements of buf that is
             * out of range representable by the external data type, it is not
             * considered a fatal error. The request must continue to finish.
             */
            if (cbuf != buf) NCI_Free(cbuf);
#if 0
            if (err != NC_NOERR && err != NC_ERANGE) {
                if (fIsSet(reqMode, NC_REQ_NBB)) abuf_dealloc(ncp, abuf_index);
                else                             NCI_Free(xbuf);
                return err;
            }
#endif
        }
        else {
/*
            if (xbuf == NULL) xbuf = cbuf;
            else if (cbuf == buf) memcpy(xbuf, cbuf, (size_t)nbytes);
*/
            if (cbuf == buf && xbuf != buf) memcpy(xbuf, cbuf, (size_t)nbytes);

            if (need_swap) {
                /* perform array in-place byte swap on xbuf */
                ncmpii_in_swapn(xbuf, bnelems, varp->xsz);

                if (xbuf == buf) need_swap_back_buf = 1;
                /* when user buf is used as xbuf, we need to byte-swap buf
                 * back to its original contents */
            }
        }
#endif
    }
    else { /* read request */
        /* Type conversion and byte swap for read are done at wait call, we
         * need bnelems to reverse the steps as done in write case
         */
        if (buftype_is_contig && imaptype == MPI_DATATYPE_NULL && !need_convert)
            xbuf = buf;  /* there is no buffered read (bget_var, etc.) */
        else
            xbuf = NCI_Malloc((size_t)nbytes);
    }

    if (fIsSet(reqMode, NC_REQ_WR)) {
        /* allocate write/read request array */
        if (ncp->numPutReqs % NC_REQUEST_CHUNK == 0)
            ncp->put_list = (NC_req*) NCI_Realloc(ncp->put_list,
                            ((size_t)ncp->numPutReqs + NC_REQUEST_CHUNK) *
                            sizeof(NC_req));
        req = ncp->put_list + ncp->numPutReqs;

        /* the new request ID will be an even number (max of write ID + 2) */
        req->id = 0;
        if (ncp->numPutReqs > 0)
            req->id = ncp->put_list[ncp->numPutReqs-1].id + 2;

        ncp->numPutReqs++;
    }
    else {  /* read request */
        /* allocate write/read request array */
        if (ncp->numGetReqs % NC_REQUEST_CHUNK == 0)
            ncp->get_list = (NC_req*) NCI_Realloc(ncp->get_list,
                            ((size_t)ncp->numGetReqs + NC_REQUEST_CHUNK) *
                            sizeof(NC_req));
        req = ncp->get_list + ncp->numGetReqs;

        /* the new request ID will be an odd number (max of read ID + 2) */
        req->id = 1;
        if (ncp->numGetReqs > 0)
            req->id = ncp->get_list[ncp->numGetReqs-1].id + 2;

        ncp->numGetReqs++;
    }

    /* if isSameGroup, then this request is from i_varn API */
    if (isSameGroup && reqid != NULL)
        req->id = *reqid;

    req->flag = 0;
    if (buftype_is_contig)  fSet(req->flag, NC_BUFTYPE_IS_CONTIG);
    if (need_swap_back_buf) fSet(req->flag, NC_NEED_SWAP_BACK_BUF);

    req->varp        = varp;
    req->buf         = buf;
    req->xbuf        = xbuf;
    req->bnelems     = bnelems;
    req->bufcount    = bufcount;
    req->ptype       = ptype;
    req->imaptype    = imaptype;
    req->abuf_index  = abuf_index;
    req->tmpBuf      = NULL;
    req->userBuf     = NULL;
    req->status      = NULL;
    req->num_recs    = 1;   /* For record variable, this will be set to
                                    * the number of records requested. For
                                    * fixed-size variable, this will be 1.
                                    */

    /* only when read and buftype is not contiguous, we duplicate buftype for
     * later in the wait call to unpack buffer based on buftype
     */
    if (fIsSet(reqMode, NC_REQ_RD) && !buftype_is_contig)
        MPI_Type_dup(buftype, &req->buftype);
    else
        req->buftype = MPI_DATATYPE_NULL;

    /* allocate start/count/stride arrays */
    dims_chunk = (size_t)varp->ndims * SIZEOF_MPI_OFFSET;
    if (stride != NULL)
        req->start = (MPI_Offset*) NCI_Malloc(dims_chunk*3);
    else
        req->start = (MPI_Offset*) NCI_Malloc(dims_chunk*2);

    req->count = req->start + varp->ndims;

    if (stride != NULL)
        req->stride = req->count + varp->ndims;
    else
        req->stride = NULL;

    /* set the values for start/count/stride */
    for (i=0; i<varp->ndims; i++) {
        req->start[i] = start[i];
        req->count[i] = count[i];
        if (stride != NULL)
            req->stride[i] = stride[i];
    }

    /* if this is a record variable and number of requesting records is > 1,
     * we split the request, one for each record
     */
    if (IS_RECVAR(varp) && req->count[0] > 1) {
        req->num_recs = req->count[0];

        add_record_requests(varp, req, stride);
        /* req->count[0] has been changed to 1 */

        if (fIsSet(reqMode, NC_REQ_WR)) ncp->numPutReqs += req->num_recs - 1;
        else                            ncp->numGetReqs += req->num_recs - 1;
    }

    /* return the request ID */
    if (reqid != NULL) *reqid = req->id;

    return err;
}

include(`utils.m4')dnl
dnl
dnl IGETPUT_API(get/put)
dnl
define(`IGETPUT_API',dnl
`dnl
/*----< ncmpio_i$1_var() >---------------------------------------------------*/
/* start  can be NULL only when api is NC_VAR
 * count  can be NULL only when api is NC_VAR or NC_VAR1
 * stride can be NULL only when api is NC_VAR, NC_VAR1, or NC_VARA
 * imap   can be NULL only when api is NC_VAR, NC_VAR1, NC_VARA, or NC_VARS
 * bufcount is >= 0 when called from a flexible API, is -1 when called from a
 *         high-level API and in this case buftype is an MPI primitive
 *         datatype.
 * buftype is an MPI primitive data type (corresponding to the internal data
 *         type of buf, e.g. short in ncmpi_put_short is mapped to MPI_SHORT)
 *         if called from a high-level APIs. When called from a flexible API
 *         it can be an MPI derived data type or MPI_DATATYPE_NULL. If it is
 *         MPI_DATATYPE_NULL, then it means the data type of buf in memory
 *         matches the variable external data type. In this case, bufcount is
 *         ignored.
 * reqMode indicates modes (NC_REQ_COLL/NC_REQ_INDEP/NC_REQ_WR etc.)
 */
int
ncmpio_i$1_var(void             *ncdp,
               int               varid,
               const MPI_Offset *start,
               const MPI_Offset *count,
               const MPI_Offset *stride,
               const MPI_Offset *imap,
               ifelse(`$1',`put',`const') void *buf,
               MPI_Offset        bufcount,
               MPI_Datatype      buftype,
               int              *reqid,
               int               reqMode)
{
    NC *ncp=(NC*)ncdp;

    /* Note sanity check for ncdp and varid has been done in dispatchers */

    return ncmpio_igetput_varm(ncp, ncp->vars.value[varid], start, count,
                               stride, imap, (void*)buf, bufcount, buftype,
                               reqid, reqMode, 0);
}
')dnl
dnl

IGETPUT_API(put)
IGETPUT_API(get)

