/**
 * Cleversafe open-source code header - Version 1.1 - December 1, 2006
 *
 * Cleversafe Dispersed Storage(TM) is software for secure, private and
 * reliable storage of the world's data using information dispersal.
 *
 * Copyright (C) 2005-2007 Cleversafe, Inc.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301,
 * USA.
 *
 * Contact Information: Cleversafe, 10 W. 35th Street, 16th Floor #84,
 * Chicago IL 60616
 * email licensing@cleversafe.org
 *
 * Author: Greg Dhuse <gdhuse@cleversafe.com>
 *
 */

#include "dsd.h"

/* Kernel always uses fixed-size sectors */
#define KERNEL_SECTOR_SIZE 512

int dsd_major = 0;                        /* Major device number */
int dsd_next_device;                      /* Next device number */
struct dsd_dev* dsd_devices;              /* Linked list head */

/* Device list management */
static struct dsd_dev* dsd_alloc_dev( void );
static void dsd_free_dev( struct dsd_dev* dev );

/* Device operations */
static void dsd_request( request_queue_t* q );
static int dsd_request_prep( request_queue_t* q, struct request* req );
static int dsd_open( struct inode* inode, struct file* filp );
static int dsd_release( struct inode* inode, struct file* filp );
static int dsd_ioctl( struct inode* inode, 
                      struct file* filp, 
                      unsigned int ioctl, 
                      unsigned long arg );

/* Device thread */
static int dsd_device_thread( void* data );

/* Kernel device operations structure */
static struct block_device_operations dsd_ops = 
{
   .owner         = THIS_MODULE,
   .open          = dsd_open,
   .release       = dsd_release,
   .ioctl         = dsd_ioctl,
};

/**
 * Filter incoming requests before processing.  If the maximum number
 * of outstanding requests has been reached, begin deferring requests.
 */
static int dsd_request_prep( request_queue_t* q, struct request* req )
{
   struct dsd_dev* dev = q->queuedata;

   /* Device is in an error state */
   if( atomic_read( &dev->error ) < 0 )
   {
      return BLKPREP_KILL;
   }

   /* Non-filesystem requests need no preparation */
   if(unlikely( !blk_fs_request( req ) ))
   {
      return BLKPREP_OK;
   }

   /* Filesystem requests need a tag */
   if( 0 == blk_queue_start_tag( q, req ) )
   {
      return BLKPREP_OK;
   }
   
   /* Try this request again later */
   return BLKPREP_DEFER;
}

/**
 * Process read and write requests
 */
static void dsd_request( request_queue_t* q )
{
   struct request* req;
   struct dsd_dev* dev = q->queuedata;

   /* Process requests */
   while( (req = elv_next_request( q )) != NULL )
   {
      uint8_t num_sectors;
      uint64_t first_sector;
      char* buffer = req->buffer;
      uint32_t buffer_size = req->nr_sectors * KERNEL_SECTOR_SIZE;

      /* 64-bit division */
      first_sector = req->sector * KERNEL_SECTOR_SIZE;
      do_div( first_sector, dev->sector_size );
      num_sectors  = req->nr_sectors * KERNEL_SECTOR_SIZE / dev->sector_size;

      if(unlikely( !blk_fs_request( req ) ))
      {
         /* Ignore non-filesystem request */
         end_request( req, 0 );
      }
      else 
      {
         int status;

         if( rq_data_dir( req ) == WRITE )
         {
            /* Process block write */     
            int msg_size = sizeof( struct dsd_msg_write_sectors ) 
               + buffer_size; 
            struct dsd_msg_write_sectors* msg 
               = kmalloc( msg_size, GFP_ATOMIC );

            #ifdef DSD_DEBUG
            printk( KERN_ALERT DSD_TAG "request: write+send: "
               "fs=%lld,ns=%u,tg=%d\n", first_sector, num_sectors, req->tag );
            #endif

            if(likely( msg ))
            {
               msg->request_tag  = htonl( req->tag );
               msg->first_sector = htonll( first_sector );
               msg->num_sectors  = num_sectors;
               memcpy( msg->data, buffer, buffer_size );

               status = dsd_send_atomic( dev->sock, 
                                         DSD_MSG_WRITE_SECTORS, 
                                        (uint8_t*)msg, 
                                         msg_size,
                                         &dev->pending_work );
            }
            else
            {
               printk( KERN_ALERT DSD_TAG "Out of memory\n" );
               status = -ENOMEM;
            }
         }
         else
         {
            /* Process block read */
            int msg_size = sizeof( struct dsd_msg_read_sectors ); 
            struct dsd_msg_read_sectors* msg 
               = kmalloc( msg_size, GFP_ATOMIC );

            #ifdef DSD_DEBUG
            printk( KERN_ALERT DSD_TAG "request: read+send: "
               "fs=%lld,ns=%u,tg=%d\n", first_sector, num_sectors, req->tag );
            #endif

            if(likely( msg ))
            {
               msg->request_tag  = htonl( req->tag );
               msg->first_sector = htonll( first_sector );
               msg->num_sectors  = num_sectors;

               status = dsd_send_atomic( dev->sock, 
                                         DSD_MSG_READ_SECTORS, 
                                        (uint8_t*)msg, 
                                         msg_size,
                                         &dev->pending_work );            
            }
            else
            {
               printk( KERN_ALERT DSD_TAG "Out of memory\n" );
               status = -ENOMEM;
            }
         }
      }
   }
   
   /** 
    * Flag the request queue to be re-processed.  Any deferred 
    * requests will be retried
    */
   blk_plug_device( q );
}

/**
 * Called when device is opened or mounted
 */
static int dsd_open( struct inode* inode, struct file* filp )
{
   struct dsd_dev* dev = NULL;

   dev = inode->i_bdev->bd_disk->private_data;
   filp->private_data = dev;
   atomic_inc( &dev->use_count );

   return 0;
}

/**
 * Called when device is closed or unmounted
 */
static int dsd_release( struct inode* inode, struct file* filp )
{
   struct dsd_dev* dev = NULL;

   dev = inode->i_bdev->bd_disk->private_data;
   atomic_dec( &dev->use_count );

   return 0;
}

/**
 * Add a new dsd device (eg. /dev/dsd9) to the system
 * @param sectors Number of sectors
 * @param sector_size Sector size in bytes
 * @param sk Initialized and connected socket
 * @param dev_ptr Optional - If non-null, will point to new device upon success
 */
int dsd_add_device( uint64_t sectors, 
                    uint32_t sector_size, 
                    struct socket* sk, 
                    struct dsd_dev** dev_ptr )
{
   int device_num = -1;
   struct dsd_dev* dev = NULL;

   /* Device numbering */
   device_num = dsd_next_device;
   dsd_next_device++;

   printk( KERN_ALERT DSD_TAG "Creating device: /dev/" 
      DSD_DEVICE_PREFIX "%d (ns=%llu,sz=%u)\n", 
      device_num, sectors, sector_size );
   
   /* Allocate a new device */
   dev = dsd_alloc_dev();
   dev->device_num   = device_num;
   dev->sectors      = sectors;
   dev->sector_size  = sector_size;
   dev->sock         = sk;          /* Already connected */
   atomic_set( &dev->use_count, 0 );
   atomic_set( &dev->error, 0 );
   atomic_set( &dev->pending_work, 0 );
   
   /* Setup request queue */
   spin_lock_init( &dev->lock );
   dev->queue = blk_init_queue( dsd_request, &dev->lock );
   if(unlikely( dev->queue == NULL ))
   {
      printk( KERN_ALERT DSD_TAG "Out of memory\n" );
      dsd_free_dev( dev );
      return -ENOMEM;
   }
   /* BUG: There appears to be an issue with sector sizes > 4k */
   blk_queue_hardsect_size( dev->queue, (unsigned short)sector_size );
   /* Note: limit to 1 sector per request for now */
   blk_queue_max_sectors( dev->queue, sector_size/KERNEL_SECTOR_SIZE ); 
   dev->queue->queuedata = dev;

   /* Tagged command queueing */
   if(unlikely( blk_queue_init_tags( dev->queue, 
         DSD_MAX_OUTSTANDING_REQUESTS, NULL ) ))
   {
      printk( KERN_ALERT DSD_TAG "Out of memory\n" );
      dsd_free_dev( dev );
      return -ENOMEM;
   }

   /* Request preparation */
   blk_queue_prep_rq( dev->queue, dsd_request_prep );
      
   /* Gendisk structure */
   dev->gd = alloc_disk( 1 );   /* 1 minor - no partitions */
   if(unlikely( !dev->gd ))
   {
      printk( KERN_NOTICE DSD_TAG "alloc_disk failure\n" );
      return -EIO;
   }
   dev->gd->major          = dsd_major;
   dev->gd->first_minor    = device_num;     
   dev->gd->fops           = &dsd_ops;
   dev->gd->queue          = dev->queue;
   dev->gd->private_data   = dev;
   snprintf( dev->gd->disk_name, 32, 
      DSD_DEVICE_PREFIX "%d", device_num );
   set_capacity( dev->gd, sectors*(sector_size/KERNEL_SECTOR_SIZE) );
   add_disk( dev->gd );

   /* Start device thread */
   dev->thread = kthread_run( dsd_device_thread, dev, dev->gd->disk_name );
   if(unlikely( dev->thread == ERR_PTR(-ENOMEM) ))
   {
      printk( KERN_ALERT DSD_TAG "Unable to start device thread\n" );
      dsd_free_dev( dev );
      return -ENOMEM;
   }

   if( dev_ptr )
   {
      *dev_ptr = dev;
   }
   return 0;
}

/**
 * Remove device (eg. /dev/dsd8) from the system
 * If the device is in use, returns < 0
 */
int dsd_remove_device( struct dsd_dev* dev )
{
   printk( KERN_ALERT DSD_TAG "Removing device: /dev/%s\n", 
      dev->gd->disk_name );

   /* Device is in an error state */
   atomic_set( &dev->error, -EIO );

   /* Stop device thread */
   if( dev->thread != current )
   {
      kthread_stop( dev->thread );
   }

   /* Wait for pending work */
   while( atomic_read( &dev->pending_work ) > 0 )
   {
      schedule();
   }

   /* Grab request queue lock, flush tagged requests */
   if(likely( dev->queue ))
   {
      unsigned int irq_flags;

      spin_lock_irqsave( &dev->lock, irq_flags );
      blk_queue_invalidate_tags( dev->queue );
      spin_unlock_irqrestore( &dev->lock, irq_flags );
   }

   /* Wait for all handles to be released */
   while( atomic_read( &dev->use_count ) > 0 ) 
   { 
      schedule(); 
   }

   /* Cleanup gendisk */      
   if(likely( dev->gd ))
   {
      del_gendisk( dev->gd );
      put_disk( dev->gd );
   }

   /* Destroy request queue */
   if(likely( dev->queue ))
   {
      blk_cleanup_queue( dev->queue );
   }

   /* Close socket, free device */
   sock_release( dev->sock ); 
   dsd_free_dev( dev );

   /** Remove last reference to module if this is the last disk */
   if( !dsd_devices )
   {
      /* FIXME: hack */
      while( module_refcount( THIS_MODULE ) > 0 )
      {
         module_put( THIS_MODULE );
      }
   }

   return 0;
}

/**
 * Handle ioctl calls
 */
static int dsd_ioctl( struct inode* inode, 
                      struct file* filp,
                      unsigned int ioctl,
                      unsigned long arg )
{
   int status = -ENOTTY;

   switch( ioctl )
   {
      /* Physical device geometry */
      case HDIO_GETGEO:
      {
         printk( KERN_ALERT DSD_TAG "ioctl( HDIO_GETGEO )\n" );
         /* FIXME: WRITE ME */
      }
      break;

      default:
      {
         printk( KERN_ALERT DSD_TAG "Unhandled ioctl(0x%08x)\n", ioctl );
      }
      break;
   }

   return status;
}


/**
 * Device thread: Process incoming network events
 */
static int dsd_device_thread( void* data )
{
   int status = 0;
   unsigned int irq_flags;
   struct dsd_dev* dev = (struct dsd_dev*)data;

   while( !kthread_should_stop() )
   {
      int msg_type;
      uint8_t* msg;

      /* Blocking recv */
      msg_type = dsd_recv( dev->sock, &msg );
      if(unlikely( msg_type < 0 ))
      {
         printk( KERN_ALERT DSD_TAG "Device communication error: %s(%d)\n", 
            dev->gd->disk_name, msg_type );
         status = -EIO;
         goto remove_device;
      }

      switch( msg_type )
      {
         case DSD_MSG_REMOVE_DEVICE:  
         {
            /**
             * Remove device (eg. /dev/dsd4)
             */
            struct dsd_msg_remove_device_rsp* rsp_fields
               = kmalloc( sizeof(struct dsd_msg_remove_device_rsp), GFP_KERNEL );

            /* Acknowledge receipt, then cleanup */
            if(unlikely( !rsp_fields ))
            {
               status = -ENOMEM;
            }
            else
            {
               status = 0;
               rsp_fields->status = 0;

               dsd_send( dev->sock, 
                         DSD_MSG_REMOVE_DEVICE_RSP, 
                        (uint8_t*)rsp_fields, 
                         sizeof(struct dsd_msg_remove_device_rsp) );
            }   
            
            goto remove_device;
         }
         break;

         case DSD_MSG_WRITE_SECTORS_RSP:
         {
            /**
             * Response to a write request
             */
            struct request* req = NULL;
            struct dsd_msg_write_sectors_rsp* response
               = (struct dsd_msg_write_sectors_rsp*)msg;

            response->request_tag = ntohl( response->request_tag );

            #ifdef DSD_DEBUG
            printk( KERN_ALERT DSD_TAG "write+recv: Got: st=%d,tg=%d\n", 
               response->status, response->request_tag );
            #endif

            /* Associate tagged command response */
            req = blk_queue_find_tag( dev->queue, response->request_tag );
            if( !req )
            {
               status = -EIO;
               goto remove_device;
            }
            
            /* Complete request */
            #ifdef DSD_DEBUG
            printk( KERN_ALERT DSD_TAG "Write complete (%s)\n", 
               (0 == response->status) ? "success" : "failure" );
            #endif
            spin_lock_irqsave( &dev->lock, irq_flags );
            {
               int success = ( 0 == response->status );
               int end_status = end_that_request_first( req, success, req->nr_sectors );
               BUG_ON( end_status > 0 );

               blk_queue_end_tag( dev->queue, req );
               end_that_request_last( req, success );
            }
            spin_unlock_irqrestore( &dev->lock, irq_flags );
         }
         break;

         case DSD_MSG_READ_SECTORS_RSP:
         {
            /**
             * Response to a read request
             */
            struct request* req = NULL;
            struct dsd_msg_read_sectors_rsp* response
               = (struct dsd_msg_read_sectors_rsp*)msg;

            response->request_tag = ntohl( response->request_tag );
            response->bytes = ntohll( response->bytes );

            #ifdef DSD_DEBUG
            printk( KERN_ALERT DSD_TAG "read+recv: Got: b=%lld,tg=%d\n", 
               response->bytes, response->request_tag );
            #endif

            /* Associate tagged command response */
            req = blk_queue_find_tag( dev->queue, response->request_tag );
            if( !req )
            {
               status = -EIO;
               goto remove_device;
            }
         
            /* Complete request */
            #ifdef DSD_DEBUG
            printk( KERN_ALERT DSD_TAG "Read complete (success)\n" );
            #endif
            
            BUG_ON( response->bytes != req->nr_sectors * KERNEL_SECTOR_SIZE );
            memcpy( req->buffer, response->data, response->bytes );

            spin_lock_irqsave( &dev->lock, irq_flags );
            {
               int end_status = end_that_request_first( req, 1, req->nr_sectors );
               BUG_ON( end_status > 0 );

               blk_queue_end_tag( dev->queue, req );
               end_that_request_last( req, 1 );
            }
            spin_unlock_irqrestore( &dev->lock, irq_flags );
         }
         break;

         default: break;
      }

      kfree( msg );
   }

   return 0;

remove_device:
   #ifdef DSD_DEBUG
   printk( KERN_ALERT DSD_TAG "Thread [%s] exiting, removing device\n", 
      dev->gd->disk_name );
   #endif

   dsd_remove_device( dev );
   do_exit( status );
}

/**
 * Initialize module on load
 */
int __init dsd_init( void )
{
   int status;
   dsd_devices = NULL;
   dsd_next_device = 0;

   /* Initialize communications */
   status = dsd_net_init();
   if( status < 0 )
   {
      return status;
   }

   /* Initialize bus */
   status = dsd_bus_init();
   if( status < 0 )
   {
      return status;
   }

   /* Initialize devices */
   dsd_major = register_blkdev( dsd_major, DSD_DEVICE_PREFIX );
   if( dsd_major <= 0 )
   {
      printk( KERN_WARNING DSD_TAG "Unable to get major number\n" );
      return -EBUSY;
   }

   #ifndef DSD_DEBUG
	printk( KERN_ALERT DSD_TAG "Loaded, accepting queries (mj=%d)\n", 
      dsd_major );
   #else
	printk( KERN_ALERT DSD_TAG "Loaded, accepting queries (DEBUG MODE, mj=%d)\n", 
      dsd_major );
   #endif

	return 0;
}

/**
 * Cleanup module when removed
 */
void __exit dsd_exit( void )
{
   /** 
    * Deallocating individual devices is not
    * necessary, because this func is not called
    * until all references are removed
    */
   unregister_blkdev( dsd_major, DSD_DEVICE_PREFIX );

   /* Unregister bus */
   dsd_bus_exit();

   /* Cleanup communications */
   dsd_net_exit();

	printk( KERN_ALERT DSD_TAG "Unloaded\n" );
}

/**
 * Allocate a new device and add it to the device list
 */
static struct dsd_dev* dsd_alloc_dev()
{
   struct dsd_dev* dev;
   struct dsd_dev* tmp;

   dev = kmalloc( sizeof( struct dsd_dev ), GFP_KERNEL );
   memset( dev, 0, sizeof( struct dsd_dev ) );

   if( !dsd_devices )
   {
      /* First device */
      dsd_devices = dev;
   }
   else 
   {
      /* Append to list */
      for( tmp = dsd_devices; tmp->next; tmp = tmp->next );
      tmp->next = dev;
   }

   return dev;
}

/**
 * Remove device from the device list and free it
 */
static void dsd_free_dev( struct dsd_dev* dev )
{
   struct dsd_dev *parent;
   
   /* Unlink device */
   if( dev == dsd_devices )
   {
      /* First device */
      dsd_devices = dev->next;
   }
   else
   {
      for( parent = dsd_devices; parent; parent = parent->next )
      {
         if( parent->next == dev )
         {
            parent->next = dev->next;
            break;
         }
      }
   }

   memset( dev, 0, sizeof( struct dsd_dev ) );
   kfree( dev );
}

#ifdef DSD_DEBUG
void dsd_hexdump(uint8_t* buf, unsigned int len)
{
   if( !buf )
   {
      printk( KERN_ALERT DSD_TAG "hexdump: NULL parameter\n" );
      return;
   }

   printk( KERN_ALERT DSD_TAG "hexdump(0x%08x):", (unsigned int)buf );
   while (len--)
      printk("%02x", *buf++);
   printk("\n");
}
#endif

module_init( dsd_init );
module_exit( dsd_exit );

MODULE_LICENSE( "GPL" );

