msm: kgsl: Increase the timeout value for fault detection

Increment the timeout value for detecting faults from 50ms to 200ms. The fault detection mechanism is a backup to hardware fault detection. 50ms is small since it has been observed that this mechanism triggers false positives with 50ms. CRs-Fixed: 672164 Change-Id: I8f382051e06bdc47f152309a2868092e1980604e Signed-off-by: Shubhraprakash Das <sadas@codeaurora.org> Signed-off-by: Tarun Karra <tkarra@codeaurora.org> Signed-off-by: Sunil Khatri <sunilkh@codeaurora.org>
author: Shubhraprakash Das <sadas@codeaurora.org> 2014-03-26 16:06:29 -0700
committer: Gaurav Gagrani <ggagrani@codeaurora.org> 2014-07-31 12:21:20 +0530
commit: 31dd557cdf82dd43358be10a0dcfe29184fdda7d (patch)
tree: 0b2a958cc370276b567b7a8808a112327f93f447
parent: 1f5f9362c63fd695e93f15ec692ae6286b2fc685 (diff)
4 files changed, 18 insertions, 17 deletions
diff --git a/drivers/gpu/msm/adreno.c b/drivers/gpu/msm/adreno.c
index b859192a650..1f76c03c73f 100644
--- a/drivers/gpu/msm/adreno.c
+++ b/drivers/gpu/msm/adreno.c
@@ -137,7 +137,7 @@ static struct adreno_device device_3d0 = {
 
 /* This set of registers are used for Hang detection
  * If the values of these registers are same after
- * KGSL_TIMEOUT_PART time, GPU hang is reported in
+ * KGSL_TIMEOUT_HANG_DETECT time, GPU hang is reported in
  * kernel log.
  * *****ALERT******ALERT********ALERT*************
  * Order of registers below is important, registers
@@ -557,7 +557,7 @@ static irqreturn_t adreno_irq_handler(struct kgsl_device *device)
 	mod_timer_pending(&device->idle_timer,
 		jiffies + device->pwrctrl.interval_timeout);
 	mod_timer_pending(&device->hang_timer,
-		(jiffies + msecs_to_jiffies(KGSL_TIMEOUT_PART)));
+		(jiffies + msecs_to_jiffies(KGSL_TIMEOUT_HANG_DETECT)));
 	return result;
 }
 
@@ -1720,7 +1720,7 @@ static int adreno_start(struct kgsl_device *device)
 	}
 
         mod_timer(&device->hang_timer,
-		(jiffies + msecs_to_jiffies(KGSL_TIMEOUT_PART)));
+		(jiffies + msecs_to_jiffies(KGSL_TIMEOUT_HANG_DETECT)));
 
 	device->reset_counter++;
 
@@ -2596,7 +2596,7 @@ adreno_dump_and_exec_ft(struct kgsl_device *device)
 			mod_timer(&device->idle_timer, jiffies + FIRST_TIMEOUT);
 			mod_timer(&device->hang_timer,
 				(jiffies +
-				msecs_to_jiffies(KGSL_TIMEOUT_PART)));
+				msecs_to_jiffies(KGSL_TIMEOUT_HANG_DETECT)));
 		}
 		complete_all(&device->ft_gate);
 	}
@@ -3016,7 +3016,7 @@ static int adreno_ringbuffer_drain(struct kgsl_device *device,
 	/*
 	 * The first time into the loop, wait for 100 msecs and kick wptr again
 	 * to ensure that the hardware has updated correctly.  After that, kick
-	 * it periodically every KGSL_TIMEOUT_PART msecs until the timeout
+	 * it periodically every KGSL_TIMEOUT_HANG_DETECT msecs until the timeout
 	 * expires
 	 */
 
@@ -3028,7 +3028,7 @@ static int adreno_ringbuffer_drain(struct kgsl_device *device,
 			if (adreno_ft_detect(device, regs))
 				return -ETIMEDOUT;
 
-			wait = jiffies + msecs_to_jiffies(KGSL_TIMEOUT_PART);
+			wait = jiffies + msecs_to_jiffies(KGSL_TIMEOUT_HANG_DETECT);
 		}
 		GSL_RB_GET_READPTR(rb, &rb->rptr);
 
@@ -3063,7 +3063,7 @@ retry:
 
 	/* now, wait for the GPU to finish its operations */
 	wait_time = jiffies + msecs_to_jiffies(ADRENO_IDLE_TIMEOUT);
-	wait_time_part = jiffies + msecs_to_jiffies(KGSL_TIMEOUT_PART);
+	wait_time_part = jiffies + msecs_to_jiffies(KGSL_TIMEOUT_HANG_DETECT);
 
 	while (time_before(jiffies, wait_time)) {
 		if (adreno_isidle(device))
@@ -3072,7 +3072,7 @@ retry:
 		/* Dont wait for timeout, detect hang faster.  */
 		if (time_after(jiffies, wait_time_part)) {
 			wait_time_part = jiffies +
-				msecs_to_jiffies(KGSL_TIMEOUT_PART);
+				msecs_to_jiffies(KGSL_TIMEOUT_HANG_DETECT);
 			if ((adreno_ft_detect(device, prev_reg_val)))
 				goto err;
 		}
@@ -3441,8 +3441,8 @@ unsigned int adreno_ft_detect(struct kgsl_device *device,
 	}
 
 	/*
-	 * Time interval between hang detection should be KGSL_TIMEOUT_PART
-	 * or more, if next hang detection is requested < KGSL_TIMEOUT_PART
+	 * Time interval between hang detection should be KGSL_TIMEOUT_HANG_DETECT
+	 * or more, if next hang detection is requested < KGSL_TIMEOUT_HANG_DETECT
 	 * from the last time do nothing.
 	 */
 	if ((next_hang_detect_time) &&
@@ -3450,7 +3450,7 @@ unsigned int adreno_ft_detect(struct kgsl_device *device,
 			return 0;
 	else
 		next_hang_detect_time = (jiffies +
-			msecs_to_jiffies(KGSL_TIMEOUT_PART-1));
+			msecs_to_jiffies(KGSL_TIMEOUT_HANG_DETECT));
 
 	/* Read the current Hang detect reg values here */
 	for (i = 0; i < FT_DETECT_REGS_COUNT; i++) {
@@ -3522,7 +3522,7 @@ unsigned int adreno_ft_detect(struct kgsl_device *device,
 
 		if (curr_context != NULL) {
 
-			curr_context->ib_gpu_time_used += KGSL_TIMEOUT_PART;
+			curr_context->ib_gpu_time_used += KGSL_TIMEOUT_HANG_DETECT;
 			KGSL_FT_INFO(device,
 			"Proc %s used GPU Time %d ms on timestamp 0x%X\n",
 			curr_context->pid_name, curr_context->ib_gpu_time_used,
diff --git a/drivers/gpu/msm/kgsl.c b/drivers/gpu/msm/kgsl.c
index 2da190a3364..7723175358d 100644
--- a/drivers/gpu/msm/kgsl.c
+++ b/drivers/gpu/msm/kgsl.c
@@ -63,7 +63,7 @@ static void kgsl_mem_entry_detach_process(struct kgsl_mem_entry *entry);
  * kgsl_hang_check() - Check for GPU hang
  * data: KGSL device structure
  *
- * This function is called every KGSL_TIMEOUT_PART time when
+ * This function is called every KGSL_TIMEOUT_HANG_DETECT time when
  * GPU is active to check for hang. If a hang is detected we
  * trigger fault tolerance.
  */
@@ -82,7 +82,7 @@ void kgsl_hang_check(struct work_struct *work)
 			adreno_dump_and_exec_ft(device);
 
 		mod_timer(&device->hang_timer,
-			(jiffies + msecs_to_jiffies(KGSL_TIMEOUT_PART)));
+			(jiffies + msecs_to_jiffies(KGSL_TIMEOUT_HANG_DETECT)));
 	}
 
 	mutex_unlock(&device->mutex);
@@ -95,7 +95,7 @@ void kgsl_hang_check(struct work_struct *work)
  * This function is called when hang timer expires, in this
  * function we check if GPU is in active state and queue the
  * work on device workqueue to check for the hang. We restart
- * the timer after KGSL_TIMEOUT_PART time.
+ * the timer after KGSL_TIMEOUT_HANG_DETECT time.
  */
 void hang_timer(unsigned long data)
 {
diff --git a/drivers/gpu/msm/kgsl_device.h b/drivers/gpu/msm/kgsl_device.h
index 8b08b4686b4..59d19f68582 100644
--- a/drivers/gpu/msm/kgsl_device.h
+++ b/drivers/gpu/msm/kgsl_device.h
@@ -28,6 +28,7 @@
 #define KGSL_TIMEOUT_DEFAULT        0xFFFFFFFF
 #define KGSL_TIMEOUT_PART           50 /* 50 msec */
 #define KGSL_TIMEOUT_LONG_IB_DETECTION  2000 /* 2 sec*/
+#define KGSL_TIMEOUT_HANG_DETECT	200	/* 200 msec */
 
 #define FIRST_TIMEOUT (HZ / 2)
 
diff --git a/drivers/gpu/msm/kgsl_pwrctrl.c b/drivers/gpu/msm/kgsl_pwrctrl.c
index 26454db3d1b..323da1b0f3b 100644
--- a/drivers/gpu/msm/kgsl_pwrctrl.c
+++ b/drivers/gpu/msm/kgsl_pwrctrl.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2010-2012, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2010-2012,2014 The Linux Foundation. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -1287,7 +1287,7 @@ int kgsl_pwrctrl_wake(struct kgsl_device *device)
 		mod_timer(&device->idle_timer,
 				jiffies + device->pwrctrl.interval_timeout);
 		mod_timer(&device->hang_timer,
-			(jiffies + msecs_to_jiffies(KGSL_TIMEOUT_PART)));
+			(jiffies + msecs_to_jiffies(KGSL_TIMEOUT_HANG_DETECT)));
 		pm_qos_update_request(&device->pm_qos_req_dma,
 					GPU_SWFI_LATENCY);
 	case KGSL_STATE_ACTIVE:
author	Shubhraprakash Das <sadas@codeaurora.org>	2014-03-26 16:06:29 -0700
committer	Gaurav Gagrani <ggagrani@codeaurora.org>	2014-07-31 12:21:20 +0530
commit	31dd557cdf82dd43358be10a0dcfe29184fdda7d (patch)
tree	0b2a958cc370276b567b7a8808a112327f93f447
parent	1f5f9362c63fd695e93f15ec692ae6286b2fc685 (diff)