/* msm-exa.c
 *
 * Copyright (c) 2009, Code Aurora Forum. All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *     * Redistributions of source code must retain the above copyright
 *       notice, this list of conditions and the following disclaimer.
 *     * Redistributions in binary form must reproduce the above copyright
 *       notice, this list of conditions and the following disclaimer in the
 *       documentation and/or other materials provided with the distribution.
 *     * Neither the name of Code Aurora nor
 *       the names of its contributors may be used to endorse or promote
 *       products derived from this software without specific prior written
 *       permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NON-INFRINGEMENT ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#ifdef HAVE_CONFIG_H
#include "config.h"
#endif

#include <sys/ioctl.h>
#include <sys/time.h>

#include "xf86.h"
#include "exa.h"

#include "msm.h"
#include "msm-drm.h"
#include "msm-render.h"

unsigned int elapsed(struct timeval *start, struct timeval *end)
{
	unsigned int ret;

	if (end->tv_sec > start->tv_sec) {
		ret = (end->tv_sec - start->tv_sec  - 1) * 1000000;
		ret += (1000000 - start->tv_usec) + end->tv_usec;
	}
	else
		ret = end->tv_usec - start->tv_usec;

	return ret;
}

/* Specify how many blits we will execute at one time */
#define MSM_MAX_BLITS 16

/* Useful macros */

#define FIXED(_x)  IntToxFixed(_x)
#define INT(_x)    xFixedToInt(_x)

#define MSMPTR_FROM_PIXMAP(_x)         \
	MSMPTR(xf86Screens[(_x)->drawable.pScreen->myNum])

/* This is a local scratch structure used to store information */
static struct
{
    unsigned int src_width;
    unsigned int src_height;
    unsigned long src_priv;
    int src_pitch;
    int src_format;
    int src_flags;

    int dst_priv;
    int dst_format;
    Pixel dst_fg_color;

    unsigned int flags;
    PictTransformPtr transform;

    int repeatType;
} exaScratch;

/* Defines for exaScratch.flags */

#define EXA_SCRATCH_REPEAT 0x01

/* Set to TRUE to create verbose error messages for MSM BLIT failures. */
#define DEBUG_MSM_BLIT (TRUE)

/* This is a lookup table to convert between pictFormat and the
   format used by msm_fb
*/

static struct
{
   int pictFormat;
   int msmFormat;
} msmFormats[] = {
   {
   PICT_a8r8g8b8, MDP_XRGB_8888}, {
   PICT_x8r8g8b8, MDP_XRGB_8888}, {
   PICT_r8g8b8, MDP_RGB_888}, {
   PICT_r5g6b5, MDP_RGB_565}, {
   PICT_b5g6r5, MDP_BGR_565},};

/* Given a picture format, return the MDP format */

static int
msm_lookup_format(PicturePtr p)
{
   int i;

   for (i = 0; i < ARRAY_SIZE(msmFormats); i++) {
      if (msmFormats[i].pictFormat == p->format)
	 return msmFormats[i].msmFormat;
   }

   /* Use this as the "error" value */
   return MDP_IMGTYPE_LIMIT;
}

/* Store solid fill info for Neon (or, in future, MDP) acceleration. */

static Bool
MSMPrepareSolid(PixmapPtr pxDst, int alu, Pixel planemask, Pixel fg)
{
   MSMPtr pMsm = MSMPTR_FROM_PIXMAP(pxDst);

   if (!(pMsm->FastFill))
       return FALSE;

   /* TODO: Support GXSet and GXClear for optimizing xterm background? */
   if (planemask != ~0U || alu != GXcopy)
       return FALSE;

   /* FIXME: Don't support 24bpp yet, since it's not so easy. */

   if (pxDst->drawable.bitsPerPixel == 16
       || pxDst->drawable.bitsPerPixel == 32)
   {
       exaScratch.dst_priv = msm_pixmap_offset(pxDst);
       exaScratch.dst_fg_color = fg;
       return TRUE;
   }

   return FALSE;
}

/* Do a solid fill */

void MSMSolid(PixmapPtr pxDst, int x1, int y1, int x2, int y2)
{
    /* TODO: Since all of the following values depend on pxDst, it could
       be moved to MSMPrepareSolid() to make MSMSolid() more efficient. */

    MSMPtr pMsm = MSMPTR_FROM_PIXMAP(pxDst);
    int destSurfaceWidthPixels = pxDst->drawable.width;
    int bitsPerPixel = pxDst->drawable.bitsPerPixel;
    BOOL blockSignalsForVFP = !(pMsm->NoSigBlock);

    swFill(pMsm, exaScratch.dst_priv, destSurfaceWidthPixels,
	   x1, y1, x2 - x1, y2 - y1, exaScratch.dst_fg_color,
	   bitsPerPixel, blockSignalsForVFP);
}


/* Finish the solid fill */

void MSMDoneSolid(PixmapPtr pxDst)
{
    /* Nothing to do at the moment. */
}

/* Prepare to execute an accelerated copy */

static Bool
MSMPrepareCopy(PixmapPtr pxSrc, PixmapPtr pxDst, int dx, int dy,
	       int alu, Pixel planemask)
{
    /* FIXME:  Do we support other raster operations? */

    if (planemask != ~0U || alu != GXcopy)
	return FALSE;

    /* Require that the source and destination are the same depths */

    if (pxSrc->drawable.bitsPerPixel != pxDst->drawable.bitsPerPixel)
	return FALSE;

    /* Figure out the format of the operation */

    switch (pxSrc->drawable.bitsPerPixel) {
    case 32:
	exaScratch.src_format = MDP_XRGB_8888;
	break;
    case 24:
	exaScratch.src_format = MDP_RGB_888;
	break;
    case 16:
	exaScratch.src_format = MDP_RGB_565;
	break;
    default:
	return FALSE;
    }

    hwBlitReset();

    /* TODO: Note that dx and dy contain the copy direction.
       Using that data may allow us to skip that calculation in the driver.
    */

    /* Remember the details of the source pixmap */

    exaScratch.src_width = pxSrc->drawable.width;
    exaScratch.src_height = pxSrc->drawable.height;
    exaScratch.src_pitch = msm_pixmap_get_pitch(pxSrc);

    if (msm_pixmap_in_gem(pxSrc)) {
	exaScratch.src_flags = MSM_BLIT_GEM;
	exaScratch.src_priv = (unsigned long) msm_get_pixmap_bo(pxSrc);
    }
    else
    {
        exaScratch.src_flags = MSM_BLIT_FB;
        exaScratch.src_priv = msm_pixmap_offset(pxSrc);
    }

   return TRUE;
}


/* There are three scenarios in which we would use software -
 * 1) if SWBlit was set in the config file, 2) if the width and
 * height is less then a prearranged amount, or 3) if the source
 * and destination are overlapping. */

static inline void
HWOrSWBlit(MSMPtr pMsm, int w, int h,
	   MSMBlitSurface *src, MSMBlitSurface *dst,
	   MSMBlitRect *srcRect, MSMBlitRect *dstRect, int bpp)
{
    MSMBlitRec blit;

    blit.src = src;
    blit.dst = dst;
    blit.srcRect = srcRect;
    blit.dstRect = dstRect;

    if (pMsm->useSWBlit || (h * w <= 1500) ||
	!isCopyMDPCompatible(&blit, bpp)) {
	hwBlitFlush(pMsm);
	swBlit(pMsm, &blit, bpp, !(pMsm->NoSigBlock));
    }
    else
	hwBlit(pMsm, &blit, 0);
}

static void
MSMDoCopy(PixmapPtr pxDst, int srcX, int srcY, int dstX, int dstY,
	  int w, int h)
{
    MSMPtr pMsm = MSMPTR_FROM_PIXMAP(pxDst);
    MSMBlitSurface srcSurface, dstSurface;
    MSMBlitRect srcRect, dstRect;

    int bpp = pxDst->drawable.bitsPerPixel;

    srcSurface.width = exaScratch.src_width;
    srcSurface.height = exaScratch.src_height;
    srcSurface.format = exaScratch.src_format;
    srcSurface.priv[0] = exaScratch.src_priv;
    srcSurface.flags = exaScratch.src_flags;
    srcSurface.pitch = exaScratch.src_pitch;

    dstSurface.width = pxDst->drawable.width;
    dstSurface.height = pxDst->drawable.height;
    dstSurface.format = exaScratch.src_format;
    dstSurface.pitch = msm_pixmap_get_pitch(pxDst);

    if (msm_pixmap_in_gem(pxDst)) {
	dstSurface.flags = MSM_BLIT_GEM;
	dstSurface.priv[0] = (unsigned long) msm_get_pixmap_bo(pxDst);
    }
    else
    {

        dstSurface.flags = MSM_BLIT_FB;
        dstSurface.priv[0] = msm_pixmap_offset(pxDst);
    }

    srcRect.x = srcX;
    srcRect.y = srcY;
    srcRect.w = w;
    srcRect.h = h;

    dstRect.x = dstX;
    dstRect.y = dstY;
    dstRect.w = w;
    dstRect.h = h;

    HWOrSWBlit(pMsm, w, h, &srcSurface, &dstSurface,
	       &srcRect, &dstRect, bpp);
}

/* This function takes apart the transform attached to the picture,
 * and tries to figure out what we should do with it.  The possible
 * options are:  translate, rotate and scale. We can translate anywhere,
 * we can only rotate in 90 degree increments, and there is a limitation
 * on scaling (MDP supports a scaling range of 0.25X to 4.0X).
 */

static Bool
MSMCheckTransform(PicturePtr pPict)
{
   PictTransformPtr t = pPict->transform;

   /* Check for a simple translate */
   if (t->matrix[0][0] == FIXED(1) &&
       t->matrix[0][1] == FIXED(0) &&
       t->matrix[1][0] == FIXED(0) &&
       t->matrix[1][1] == FIXED(1) &&
       t->matrix[2][0] == FIXED(0) &&
       t->matrix[2][1] == FIXED(0) && t->matrix[2][2] == FIXED(1))
      return TRUE;

   /* FIXME: Grok stretches and rotates too */
   return FALSE;
}

static void
MSMTransformPoint(PictTransform * t, int *x, int *y)
{
   PictVector v;

   if (t == NULL)
      return;

   v.vector[0] = FIXED(*x);
   v.vector[1] = FIXED(*y);
   v.vector[2] = xFixed1;

   /* PictureTransformPoint uses pixman which in turn uses fixed math
    * to calculate the point.  This should be faster then using the
    * floating point emulation, even with the additional function calls */

   PictureTransformPoint(t, &v);

   *x = INT(v.vector[0]);
   *y = INT(v.vector[1]);
}

static Bool
MSMCheckComposite(int op, PicturePtr pSrc, PicturePtr pMsk, PicturePtr pDst)
{
   /* Only support src copies - I guess technically, we could also support
    * PIctOptDst? */

   if (op != PictOpSrc) {
      return FALSE;
   }

   /* Do not support masks */

   if (pMsk != NULL) {
      return FALSE;
   }

   exaScratch.src_format = msm_lookup_format(pSrc);
   exaScratch.dst_format = msm_lookup_format(pDst);

   /* Leave if we don't suport the source or destination format */

   if (exaScratch.src_format == MDP_IMGTYPE_LIMIT ||
       exaScratch.dst_format == MDP_IMGTYPE_LIMIT) {
      return FALSE;
   }

   /* With DRM in KMEM, we can no longer use the MDP for color conversion
    * blits, sorry */

   if (exaScratch.src_format != exaScratch.dst_format)
	return FALSE;

   if (pDst->transform) {
      return FALSE;
   }

   /* Verify that we can accelerate the transform */

   if (pSrc->transform && !MSMCheckTransform(pSrc))
      return FALSE;

   exaScratch.transform = pSrc->transform;

   /* TODO: Exit if a filter is set (is this only needed with scaling?). */

   return TRUE;
}

static Bool
MSMPrepareComposite(int op, PicturePtr pSrc, PicturePtr pMsk,
		    PicturePtr pDst, PixmapPtr pxSrc, PixmapPtr pxMsk,
		    PixmapPtr pxDst)
{
    /* Check if repeat flag is set and fail if repeats are not allowed. */
    MSMPtr pMsm = MSMPTR_FROM_PIXMAP(pxDst);

    if (pSrc->repeat && !(pMsm->FastCompositeRepeat))
	return FALSE;

    hwBlitReset();

    /* Remember the surface information */

    exaScratch.src_width = pxSrc->drawable.width;
    exaScratch.src_height = pxSrc->drawable.height;
    exaScratch.src_pitch = msm_pixmap_get_pitch(pxSrc);

    if (msm_pixmap_in_gem(pxSrc)) {
	exaScratch.src_flags = MSM_BLIT_GEM;
	exaScratch.src_priv = (unsigned long) msm_get_pixmap_bo(pxSrc);
    }
    else {
	exaScratch.src_flags = MSM_BLIT_FB;
	exaScratch.src_priv = msm_pixmap_offset(pxSrc);
    }

    /* Set a flag if this operation needs to be repeated */
    exaScratch.flags |= (pSrc->repeat) ? EXA_SCRATCH_REPEAT : 0;
    exaScratch.repeatType = pSrc->repeatType;

    return TRUE;
}

static void
MSMDoComposite(PixmapPtr pxDst, int srcX, int srcY, int maskX,
	       int maskY, int dstX, int dstY, int width, int height)
{
    MSMPtr pMsm = MSMPTR_FROM_PIXMAP(pxDst);
    int bpp = pxDst->drawable.bitsPerPixel;
    int dx = dstX;
    int dy = dstY;
    int w, h;

    /* Transform the source point */
    MSMTransformPoint(exaScratch.transform, &srcX, &srcY);

    /* FIXME:  Sometimes srcX and srcY are negative, and
     * there is no accompanying transform.  This is the best
     * we can do to avoid bugs, but this still fails the
     * cairo paint-repeat test
     */

    if (srcX < 0) {
	width += srcX;
	srcX = 0;
    }

    if (srcY < 0) {
	width += srcY;
	srcY = 0;
    }

    if (width < 0 || height < 0)
	return;

    w = (exaScratch.src_width < width) ? exaScratch.src_width : width;
    h = (exaScratch.src_height < height) ? exaScratch.src_height : height;

    while (1) {
	/* Clip the operation to make sure it stays within bounds */

	int dw = (dx + w > dstX + width) ? dstX + width - dx : w;
	int dh = (dy + h > dstY + height) ? dstY + height - dy : h;

	MSMBlitSurface srcSurface, dstSurface;
	MSMBlitRect srcRect, dstRect;

	srcSurface.width = exaScratch.src_width;
	srcSurface.height = exaScratch.src_height;
	srcSurface.format = exaScratch.src_format;
	srcSurface.priv[0] = exaScratch.src_priv;
	srcSurface.flags = exaScratch.src_flags;
	srcSurface.pitch = exaScratch.src_pitch;

	dstSurface.width = pxDst->drawable.width;
	dstSurface.height = pxDst->drawable.height;
	dstSurface.format = exaScratch.dst_format;
	dstSurface.pitch = msm_pixmap_get_pitch(pxDst);

	if (msm_pixmap_in_gem(pxDst)) {
	    dstSurface.flags = MSM_BLIT_GEM;
	    dstSurface.priv[0] = (unsigned long) msm_get_pixmap_bo(pxDst);
	}
	else {
	    dstSurface.flags = MSM_BLIT_FB;
	    dstSurface.priv[0] = msm_pixmap_offset(pxDst);
	}

	srcRect.x = srcX;
	srcRect.y = srcY;
	srcRect.w = dw;
	srcRect.h = dh;

	dstRect.x = dx;
	dstRect.y = dy;
	dstRect.w = dw;
	dstRect.h = dh;

        /* All the operations are straight copies, so
         * use software or hardware rendering, depending on which
	   is most efficient. */

	HWOrSWBlit(pMsm, w, h, &srcSurface, &dstSurface,
		   &srcRect, &dstRect, bpp);

      /* If the repeat flag isn't set, then we are done */

      if (!(exaScratch.flags & EXA_SCRATCH_REPEAT))
	  break;

      dx += dw;

      if (dx >= dstX + width) {
	 dx = dstX;
	 dy += dh;
      }

      if (dy >= dstY + height)
	 break;
   }
}

static void
MSMDone(PixmapPtr ptr)
{
    MSMPtr pMsm = MSMPTR_FROM_PIXMAP(ptr);
    hwBlitFlush(pMsm);
}

/* Upload bytes from a source address to a pixmap in on-screen memory. */

Bool
MSMUploadToScreen(PixmapPtr pxDst,
                  int dstX, int dstY, int w, int h,
                  char *src, int srcPitch)
{
   if (pxDst->drawable.bitsPerPixel == 16
       || pxDst->drawable.bitsPerPixel == 24
       || pxDst->drawable.bitsPerPixel == 32)
   {
      int bitsPerPixel = pxDst->drawable.bitsPerPixel;
      int bytesPerPixel = bitsPerPixel / 8;
      MSMPtr pMsm = MSMPTR_FROM_PIXMAP(pxDst);
      int dstPitch = msm_pixmap_get_pitch(pxDst);
      int dstOffset = msm_pixmap_offset(pxDst);
      char *dst = (char *) (pMsm->fbmem) + dstOffset + dstY * dstPitch + dstX * bytesPerPixel;
      BOOL blockSignalsForVFP = !(pMsm->NoSigBlock);

      swBlit_NoOverlap((unsigned char* __restrict__) dst, (unsigned char* __restrict__) src,
                       w, h, dstPitch, srcPitch,
                       bitsPerPixel, blockSignalsForVFP);
      return TRUE;
   }
   else
      return FALSE;
}

/* Dowload bytes from a pixmap in on-screen memory to a destination address. */

Bool
MSMDownloadFromScreen(PixmapPtr pxSrc,
                      int srcX, int srcY, int w, int h,
                      char *dst, int dstPitch)
{
   if (pxSrc->drawable.bitsPerPixel == 16
       || pxSrc->drawable.bitsPerPixel == 24
       || pxSrc->drawable.bitsPerPixel == 32)
   {
      int bitsPerPixel = pxSrc->drawable.bitsPerPixel;
      int bytesPerPixel = bitsPerPixel / 8;
      MSMPtr pMsm = MSMPTR_FROM_PIXMAP(pxSrc);
      int srcPitch = msm_pixmap_get_pitch(pxSrc);
      int srcOffset = msm_pixmap_offset(pxSrc);
      char *src = (char *) (pMsm->fbmem) + srcOffset + srcY * srcPitch + srcX * bytesPerPixel;
      BOOL blockSignalsForVFP = !(pMsm->NoSigBlock);

      swBlit_NoOverlap((unsigned char* __restrict__) dst, (unsigned char* __restrict__) src,
                       w, h, dstPitch, srcPitch,
                       bitsPerPixel, blockSignalsForVFP);
      return TRUE;
   }
   else
      return FALSE;
}

static void
MSMWaitMarker(ScreenPtr pScreen, int marker)
{
}

static Bool
MSMPixmapIsOffscreen(PixmapPtr pPixmap)
{
    ScreenPtr pScreen = pPixmap->drawable.pScreen;
    struct msm_pixmap_priv *priv;

    if (pScreen->GetScreenPixmap(pScreen) == pPixmap)
                return TRUE;

    priv = exaGetPixmapDriverPrivate(pPixmap);

    if (priv && priv->bo) {
	 if (msm_drm_bo_get_memtype(priv->bo) == MSM_DRM_MEMTYPE_EBI)
		return TRUE;

	 return pPixmap->devPrivate.ptr ? FALSE : TRUE;
    }

    return FALSE;
}

static Bool
MSMPrepareAccess(PixmapPtr pPixmap, int index)
{
    struct msm_pixmap_priv *priv;

    priv = exaGetPixmapDriverPrivate(pPixmap);

    if (!priv)
	return FALSE;

    if (!priv->bo)
	return TRUE;

    if (priv->bo) {
	if (msm_drm_bo_map(priv->bo))
		return FALSE;
    }

    if (pPixmap->devPrivate.ptr == NULL)
	pPixmap->devPrivate.ptr = (void *) priv->bo->virt;

    /* Technically we should do this for all depths, but that seems to
       freak out the mouse cursor, so just do the adjustment for 16bpp */

    if (pPixmap->drawable.bitsPerPixel == 16) {
    priv->SavedPitch = pPixmap->devKind;

    pPixmap->devKind = ((pPixmap->drawable.width + 31) & ~31) *
	(pPixmap->drawable.bitsPerPixel >> 3);
    }

    return TRUE;
}

static void
MSMFinishAccess(PixmapPtr pPixmap, int index)
{
    struct msm_pixmap_priv *priv;
    priv = exaGetPixmapDriverPrivate(pPixmap);

    if (priv && priv->SavedPitch) {
	pPixmap->devKind = priv->SavedPitch;
	priv->SavedPitch = 0;
    }
}

static void *
MSMCreatePixmap(ScreenPtr pScreen, int size, int align)
{
    struct msm_pixmap_priv *priv;
    ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum];
    MSMPtr pMsm = MSMPTR(pScrn);

    priv = xcalloc(1, sizeof(struct msm_pixmap_priv));
    if (priv == NULL)
	return NULL;

    if (!size)
	return priv;

    priv->bo = msm_drm_bo_create_memtype(pMsm->drmFD, size,
					 pMsm->pixmapMemtype);

    if (priv->bo)
	return priv;

    xfree(priv);
    return NULL;
}

static void
MSMDestroyPixmap(ScreenPtr pScreen, void *dpriv)
{
    struct msm_pixmap_priv *priv = dpriv;

    if (!dpriv)
	return;

    if (priv->bo)
	msm_drm_bo_free(priv->bo);

    xfree(dpriv);
}

Bool
MSMSetupExa(ScreenPtr pScreen)
{
   ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum];

   MSMPtr pMsm = MSMPTR(pScrn);

   ExaDriverPtr pExa;

   if (pMsm->pExa == NULL)
      pMsm->pExa = exaDriverAlloc();

   if (pMsm->pExa == NULL)
      return FALSE;

   pExa = pMsm->pExa;

   /* This is the current major/minor that we support */

   pExa->exa_major = 2;
   pExa->exa_minor = 2;

   pExa->memoryBase = pMsm->fbmem;

   pExa->maxX = pMsm->mode_info.xres_virtual;
   pExa->maxY = pMsm->mode_info.yres_virtual;

   pExa->flags = EXA_OFFSCREEN_PIXMAPS;

   pExa->offScreenBase =
       (pMsm->fixed_info.line_length * pMsm->mode_info.yres);
   pExa->memorySize = pMsm->fixed_info.smem_len;

   /* Align pixmap offsets along page boundaries */
   pExa->pixmapOffsetAlign = 4096;

   /* Align pixmap pitches to the maximum needed aligment for the
      GPU - this ensures that we have enough room, and we adjust the
      pitches down to the depth later */

   pExa->pixmapPitchAlign = 128;

   pExa->PrepareSolid = MSMPrepareSolid;
   if (pMsm->FastFill) {

       /* The performance of the solid fill functions may be tested with:
	  x11perf -rectX    (where "X" is 10, 100 or 500)
          x11perf -trapX    (where "X" is 10, 100 or 300)
	  x11perf -fcircleX (where "X" is 1, 10, 100 or 500)
          (there are many more x11perf tests that do solid fill)
       */

      pExa->Solid = MSMSolid;
      pExa->DoneSolid = MSMDoneSolid;
   }

   /*  Accelerated copy function handlers.
       The performance of the copy functions may be tested with:
       x11perf -scrollX
       where "X" is 10, 100 or 500.
   */

   pExa->PrepareCopy = MSMPrepareCopy;
   pExa->Copy = MSMDoCopy;
   pExa->DoneCopy = MSMDone;
   pExa->WaitMarker = MSMWaitMarker;

   /* Accelerated compositing handler functions */

   if (pMsm->FastComposite) {
       pExa->CheckComposite = MSMCheckComposite;
       pExa->PrepareComposite = MSMPrepareComposite;
       pExa->Composite = MSMDoComposite;
       pExa->DoneComposite = MSMDone;
   }

   /* UploadToScreen and DownloadFromScreen implementations
      can copy rectangular regions much faster than memcpy()
      because they are using Neon optimizations.
      FIXME: Unfortunately, this code currently causes
      diagonal artifacts on the screen for some reason.
   */

   if (pMsm->FastAppFBMemCopy) {
       /* The performance of these functions may be tested with:
	  x11perf -copypixwinX   (for UploadToScreen())
          x11perf -copywinpixX   (for DownloadFromScreen())
	  where "X" is 10, 100 or 500.
       */

       pExa->DownloadFromScreen = MSMDownloadFromScreen;
       pExa->UploadToScreen = MSMUploadToScreen;
   }

#if USEDRI2
   if (pMsm->useDRI2) {
       pExa->flags |= EXA_HANDLES_PIXMAPS;

       pExa->PixmapIsOffscreen = MSMPixmapIsOffscreen;
       pExa->CreatePixmap = MSMCreatePixmap;
       pExa->DestroyPixmap = MSMDestroyPixmap;
       pExa->PrepareAccess = MSMPrepareAccess;
       pExa->FinishAccess = MSMFinishAccess;
   }
#endif

   return exaDriverInit(pScreen, pMsm->pExa);
}
