[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH v2 2/3] cpu-throttle: implement vCPU throttle
From: |
huangy81 |
Subject: |
[PATCH v2 2/3] cpu-throttle: implement vCPU throttle |
Date: |
Fri, 19 Nov 2021 02:17:02 +0800 |
From: Hyman Huang(黄勇) <huangy81@chinatelecom.cn>
impose dirty restraint on vCPU by kicking it and sleep
as the auto-converge does during migration, but just
kick the specified vCPU instead, not all vCPUs of vm.
start a thread to track the dirtylimit status and adjust
the throttle pencentage dynamically depend on current
and quota dirtyrate.
introduce the util function in the header for dirtylimit
implementation.
Signed-off-by: Hyman Huang(黄勇) <huangy81@chinatelecom.cn>
---
include/sysemu/cpu-throttle.h | 23 +++
softmmu/cpu-throttle.c | 319 ++++++++++++++++++++++++++++++++++++++++++
softmmu/trace-events | 5 +
3 files changed, 347 insertions(+)
diff --git a/include/sysemu/cpu-throttle.h b/include/sysemu/cpu-throttle.h
index d65bdef..726c1ce 100644
--- a/include/sysemu/cpu-throttle.h
+++ b/include/sysemu/cpu-throttle.h
@@ -65,4 +65,27 @@ bool cpu_throttle_active(void);
*/
int cpu_throttle_get_percentage(void);
+/**
+ * dirtylimit_state_init:
+ *
+ * initialize golobal state for dirtylimit
+ */
+void dirtylimit_state_init(int max_cpus);
+
+/**
+ * dirtylimit_vcpu:
+ *
+ * impose dirtylimit on vcpu util reaching the quota dirtyrate
+ */
+void dirtylimit_vcpu(int cpu_index,
+ uint64_t quota);
+/**
+ * dirtylimit_cancel_vcpu:
+ *
+ * cancel dirtylimit for the specified vcpu
+ *
+ * Returns: the number of running threads for dirtylimit
+ */
+int dirtylimit_cancel_vcpu(int cpu_index);
+
#endif /* SYSEMU_CPU_THROTTLE_H */
diff --git a/softmmu/cpu-throttle.c b/softmmu/cpu-throttle.c
index 8c2144a..a5e67c9 100644
--- a/softmmu/cpu-throttle.c
+++ b/softmmu/cpu-throttle.c
@@ -29,6 +29,8 @@
#include "qemu/main-loop.h"
#include "sysemu/cpus.h"
#include "sysemu/cpu-throttle.h"
+#include "sysemu/dirtylimit.h"
+#include "trace.h"
/* vcpu throttling controls */
static QEMUTimer *throttle_timer;
@@ -38,6 +40,323 @@ static unsigned int throttle_percentage;
#define CPU_THROTTLE_PCT_MAX 99
#define CPU_THROTTLE_TIMESLICE_NS 10000000
+#define DIRTYLIMIT_TOLERANCE_RANGE 15 /* 15MB/s */
+
+#define DIRTYLIMIT_THROTTLE_HEAVY_WATERMARK 75
+#define DIRTYLIMIT_THROTTLE_SLIGHT_WATERMARK 90
+
+#define DIRTYLIMIT_THROTTLE_HEAVY_STEP_SIZE 5
+#define DIRTYLIMIT_THROTTLE_SLIGHT_STEP_SIZE 2
+
+typedef enum {
+ RESTRAIN_KEEP,
+ RESTRAIN_RATIO,
+ RESTRAIN_HEAVY,
+ RESTRAIN_SLIGHT,
+} RestrainPolicy;
+
+typedef struct DirtyLimitState {
+ int cpu_index;
+ bool enabled;
+ uint64_t quota; /* quota dirtyrate MB/s */
+ QemuThread thread;
+ char *name; /* thread name */
+} DirtyLimitState;
+
+struct {
+ DirtyLimitState *states;
+ int max_cpus;
+ unsigned long *bmap; /* running thread bitmap */
+ unsigned long nr;
+} *dirtylimit_state;
+
+static inline bool dirtylimit_enabled(int cpu_index)
+{
+ return qatomic_read(&dirtylimit_state->states[cpu_index].enabled);
+}
+
+static inline void dirtylimit_set_quota(int cpu_index, uint64_t quota)
+{
+ qatomic_set(&dirtylimit_state->states[cpu_index].quota, quota);
+}
+
+static inline uint64_t dirtylimit_quota(int cpu_index)
+{
+ return qatomic_read(&dirtylimit_state->states[cpu_index].quota);
+}
+
+static int64_t dirtylimit_current(int cpu_index)
+{
+ return dirtylimit_calc_current(cpu_index);
+}
+
+static void dirtylimit_vcpu_thread(CPUState *cpu, run_on_cpu_data data)
+{
+ double pct;
+ double throttle_ratio;
+ int64_t sleeptime_ns, endtime_ns;
+ int *percentage = (int *)data.host_ptr;
+
+ pct = (double)(*percentage) / 100;
+ throttle_ratio = pct / (1 - pct);
+ /* Add 1ns to fix double's rounding error (like 0.9999999...) */
+ sleeptime_ns = (int64_t)(throttle_ratio * CPU_THROTTLE_TIMESLICE_NS + 1);
+ endtime_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) + sleeptime_ns;
+ while (sleeptime_ns > 0 && !cpu->stop) {
+ if (sleeptime_ns > SCALE_MS) {
+ qemu_cond_timedwait_iothread(cpu->halt_cond,
+ sleeptime_ns / SCALE_MS);
+ } else {
+ qemu_mutex_unlock_iothread();
+ g_usleep(sleeptime_ns / SCALE_US);
+ qemu_mutex_lock_iothread();
+ }
+ sleeptime_ns = endtime_ns - qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
+ }
+ qatomic_set(&cpu->throttle_thread_scheduled, 0);
+
+ free(percentage);
+}
+
+static void dirtylimit_check(int cpu_index,
+ int percentage)
+{
+ CPUState *cpu;
+ int64_t sleeptime_ns, starttime_ms, currenttime_ms;
+ int *pct_parameter;
+ double pct;
+
+ pct = (double) percentage / 100;
+
+ starttime_ms = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
+
+ while (true) {
+ CPU_FOREACH(cpu) {
+ if ((cpu_index == cpu->cpu_index) &&
+ (!qatomic_xchg(&cpu->throttle_thread_scheduled, 1))) {
+ pct_parameter = malloc(sizeof(*pct_parameter));
+ *pct_parameter = percentage;
+ async_run_on_cpu(cpu, dirtylimit_vcpu_thread,
+ RUN_ON_CPU_HOST_PTR(pct_parameter));
+ break;
+ }
+ }
+
+ sleeptime_ns = CPU_THROTTLE_TIMESLICE_NS / (1 - pct);
+ g_usleep(sleeptime_ns / SCALE_US);
+
+ currenttime_ms = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
+ if (unlikely((currenttime_ms - starttime_ms) >
+ (DIRTYLIMIT_CALC_PERIOD_TIME_S * 1000))) {
+ break;
+ }
+ }
+}
+
+static uint64_t dirtylimit_init_pct(uint64_t quota,
+ uint64_t current)
+{
+ uint64_t limit_pct = 0;
+
+ if (quota >= current || (current == 0) ||
+ ((current - quota) <= DIRTYLIMIT_TOLERANCE_RANGE)) {
+ limit_pct = 0;
+ } else {
+ limit_pct = (current - quota) * 100 / current;
+
+ limit_pct = MIN(limit_pct,
+ DIRTYLIMIT_THROTTLE_HEAVY_WATERMARK);
+ }
+
+ return limit_pct;
+}
+
+static RestrainPolicy dirtylimit_policy(unsigned int last_pct,
+ uint64_t quota,
+ uint64_t current)
+{
+ uint64_t max, min;
+
+ max = MAX(quota, current);
+ min = MIN(quota, current);
+ if ((max - min) <= DIRTYLIMIT_TOLERANCE_RANGE) {
+ return RESTRAIN_KEEP;
+ }
+ if (last_pct < DIRTYLIMIT_THROTTLE_HEAVY_WATERMARK) {
+ /* last percentage locates in [0, 75)*/
+ return RESTRAIN_RATIO;
+ } else if (last_pct < DIRTYLIMIT_THROTTLE_SLIGHT_WATERMARK) {
+ /* last percentage locates in [75, 90)*/
+ return RESTRAIN_HEAVY;
+ } else {
+ /* last percentage locates in [90, 99]*/
+ return RESTRAIN_SLIGHT;
+ }
+}
+
+static uint64_t dirtylimit_pct(unsigned int last_pct,
+ uint64_t quota,
+ uint64_t current)
+{
+ uint64_t limit_pct = 0;
+ RestrainPolicy policy;
+ bool mitigate = (quota > current) ? true : false;
+
+ if (mitigate && ((current == 0) ||
+ (last_pct <= DIRTYLIMIT_THROTTLE_SLIGHT_STEP_SIZE))) {
+ return 0;
+ }
+
+ policy = dirtylimit_policy(last_pct, quota, current);
+ switch (policy) {
+ case RESTRAIN_SLIGHT:
+ /* [90, 99] */
+ if (mitigate) {
+ limit_pct =
+ last_pct - DIRTYLIMIT_THROTTLE_SLIGHT_STEP_SIZE;
+ } else {
+ limit_pct =
+ last_pct + DIRTYLIMIT_THROTTLE_SLIGHT_STEP_SIZE;
+
+ limit_pct = MIN(limit_pct, CPU_THROTTLE_PCT_MAX);
+ }
+ break;
+ case RESTRAIN_HEAVY:
+ /* [75, 90) */
+ if (mitigate) {
+ limit_pct =
+ last_pct - DIRTYLIMIT_THROTTLE_HEAVY_STEP_SIZE;
+ } else {
+ limit_pct =
+ last_pct + DIRTYLIMIT_THROTTLE_HEAVY_STEP_SIZE;
+
+ limit_pct = MIN(limit_pct,
+ DIRTYLIMIT_THROTTLE_SLIGHT_WATERMARK);
+ }
+ break;
+ case RESTRAIN_RATIO:
+ /* [0, 75) */
+ if (mitigate) {
+ if (last_pct <= (((quota - current) * 100 / quota) / 2)) {
+ limit_pct = 0;
+ } else {
+ limit_pct = last_pct -
+ ((quota - current) * 100 / quota) / 2;
+ limit_pct = MAX(limit_pct, CPU_THROTTLE_PCT_MIN);
+ }
+ } else {
+ /*
+ * increase linearly with dirtyrate
+ * but tune a little by divide it by 2
+ */
+ limit_pct = last_pct +
+ ((current - quota) * 100 / current) / 2;
+
+ limit_pct = MIN(limit_pct,
+ DIRTYLIMIT_THROTTLE_HEAVY_WATERMARK);
+ }
+ break;
+ case RESTRAIN_KEEP:
+ default:
+ limit_pct = last_pct;
+ break;
+ }
+
+ return limit_pct;
+}
+
+static void *dirtylimit_thread(void *opaque)
+{
+ int cpu_index = *(int *)opaque;
+ uint64_t quota_dirtyrate, current_dirtyrate;
+ unsigned int last_pct = 0;
+ unsigned int pct = 0;
+
+ rcu_register_thread();
+
+ quota_dirtyrate = dirtylimit_quota(cpu_index);
+ current_dirtyrate = dirtylimit_current(cpu_index);
+
+ pct = dirtylimit_init_pct(quota_dirtyrate, current_dirtyrate);
+
+ do {
+ trace_dirtylimit_impose(cpu_index,
+ quota_dirtyrate, current_dirtyrate, pct);
+ if (pct == 0) {
+ sleep(DIRTYLIMIT_CALC_PERIOD_TIME_S);
+ } else {
+ last_pct = pct;
+ dirtylimit_check(cpu_index, pct);
+ }
+
+ quota_dirtyrate = dirtylimit_quota(cpu_index);
+ current_dirtyrate = dirtylimit_current(cpu_index);
+
+ pct = dirtylimit_pct(last_pct, quota_dirtyrate, current_dirtyrate);
+ } while (dirtylimit_enabled(cpu_index));
+
+ rcu_unregister_thread();
+
+ return NULL;
+}
+
+int dirtylimit_cancel_vcpu(int cpu_index)
+{
+ int i;
+ int nr_threads = 0;
+
+ qatomic_set(&dirtylimit_state->states[cpu_index].enabled, 0);
+ bitmap_test_and_clear_atomic(dirtylimit_state->bmap, cpu_index, 1);
+
+ for (i = 0; i < dirtylimit_state->nr; i++) {
+ unsigned long temp = dirtylimit_state->bmap[i];
+ nr_threads += ctpopl(temp);
+ }
+
+ return nr_threads;
+}
+
+void dirtylimit_vcpu(int cpu_index,
+ uint64_t quota)
+{
+ trace_dirtylimit_vcpu(cpu_index, quota);
+
+ dirtylimit_set_quota(cpu_index, quota);
+
+ if (unlikely(!dirtylimit_enabled(cpu_index))) {
+ qatomic_set(&dirtylimit_state->states[cpu_index].enabled, 1);
+ dirtylimit_state->states[cpu_index].name =
+ g_strdup_printf("dirtylimit-%d", cpu_index);
+ qemu_thread_create(&dirtylimit_state->states[cpu_index].thread,
+ dirtylimit_state->states[cpu_index].name,
+ dirtylimit_thread,
+ (void *)&dirtylimit_state->states[cpu_index].cpu_index,
+ QEMU_THREAD_DETACHED);
+ bitmap_set_atomic(dirtylimit_state->bmap, cpu_index, 1);
+ }
+}
+
+void dirtylimit_state_init(int max_cpus)
+{
+ int i;
+
+ dirtylimit_state = g_malloc0(sizeof(*dirtylimit_state));
+
+ dirtylimit_state->states =
+ g_malloc0(sizeof(DirtyLimitState) * max_cpus);
+
+ for (i = 0; i < max_cpus; i++) {
+ dirtylimit_state->states[i].cpu_index = i;
+ }
+
+ dirtylimit_state->max_cpus = max_cpus;
+ dirtylimit_state->bmap = bitmap_new(max_cpus);
+ bitmap_clear(dirtylimit_state->bmap, 0, max_cpus);
+ dirtylimit_state->nr = BITS_TO_LONGS(max_cpus);
+
+ trace_dirtylimit_state_init(max_cpus);
+}
+
static void cpu_throttle_thread(CPUState *cpu, run_on_cpu_data opaque)
{
double pct;
diff --git a/softmmu/trace-events b/softmmu/trace-events
index 9c88887..a7c9c04 100644
--- a/softmmu/trace-events
+++ b/softmmu/trace-events
@@ -31,3 +31,8 @@ runstate_set(int current_state, const char
*current_state_str, int new_state, co
system_wakeup_request(int reason) "reason=%d"
qemu_system_shutdown_request(int reason) "reason=%d"
qemu_system_powerdown_request(void) ""
+
+#cpu-throttle.c
+dirtylimit_state_init(int max_cpus) "dirtylimit state init: max cpus %d"
+dirtylimit_impose(int cpu_index, uint64_t quota, uint64_t current, int pct)
"CPU[%d] impose dirtylimit: quota %" PRIu64 ", current %" PRIu64 ", percentage
%d"
+dirtylimit_vcpu(int cpu_index, uint64_t quota) "CPU[%d] set quota dirtylimit
%"PRIu64
--
1.8.3.1