From ce5997b8161909161f443228256bdb643c81abcf Mon Sep 17 00:00:00 2001 From: Juan Quintela Date: Tue, 28 Jul 2015 15:46:47 +0200 Subject: [PATCH 02/28] migration: move ram stuff to migration/ram Message-id: <1438098431-30847-3-git-send-email-quintela@redhat.com> Patchwork-id: 67160 O-Subject: [RHEL-7 qemu-kvm PATCH 02/26] migration: move ram stuff to migration/ram Bugzilla: 580006 RH-Acked-by: Alex Williamson RH-Acked-by: Amit Shah RH-Acked-by: Dr. David Alan Gilbert For historic reasons, ram migration have been on arch_init.c. Just split it into migration/ram.c, the same that happened with block.c. There is only code movement, no changes altogether. Signed-off-by: Juan Quintela Reviewed-by: Eric Blake (cherry picked from commit 56e93d26b85bac76b93211393163c2ebcdee9481) Signed-off-by: Miroslav Rezanina Conflicts: Makefile.target arch_init.c include/migration/migration.h No compression code. Signed-off-by: Juan Quintela --- MAINTAINERS | 1 - Makefile.target | 1 + arch_init.c | 1170 --------------------------------------- include/migration/migration.h | 1 + include/sysemu/arch_init.h | 1 - migration/ram.c | 1220 +++++++++++++++++++++++++++++++++++++++++ trace-events | 2 +- 7 files changed, 1223 insertions(+), 1173 deletions(-) create mode 100644 migration/ram.c diff --git a/MAINTAINERS b/MAINTAINERS index a8ec9be..ec1d6af 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -985,7 +985,6 @@ S: Maintained F: include/migration/ F: migration/ F: savevm.c -F: arch_init.c F: scripts/vmstate-static-checker.py F: tests/vmstate-static-checker-data/ diff --git a/Makefile.target b/Makefile.target index 2262d89..729ac57 100644 --- a/Makefile.target +++ b/Makefile.target @@ -134,6 +134,7 @@ obj-$(CONFIG_KVM) += kvm-all.o obj-y += memory.o savevm.o cputlb.o obj-y += memory_mapping.o obj-y += dump.o +obj-y += migration/ram.o LIBS+=$(libs_softmmu) # xen support diff --git a/arch_init.c b/arch_init.c index 4ae486f..2f38359 100644 --- a/arch_init.c +++ b/arch_init.c @@ -54,14 +54,6 @@ #include "qemu/host-utils.h" #include "qemu/rcu_queue.h" -#ifdef DEBUG_ARCH_INIT -#define DPRINTF(fmt, ...) \ - do { fprintf(stdout, "arch_init: " fmt, ## __VA_ARGS__); } while (0) -#else -#define DPRINTF(fmt, ...) \ - do { } while (0) -#endif - #ifdef TARGET_SPARC int graphic_width = 1024; int graphic_height = 768; @@ -110,23 +102,6 @@ int graphic_depth = 32; #endif const uint32_t arch_type = QEMU_ARCH; -static bool mig_throttle_on; -static int dirty_rate_high_cnt; -static void check_guest_throttling(void); - -static uint64_t bitmap_sync_count; - -/***********************************************************/ -/* ram save/restore */ - -#define RAM_SAVE_FLAG_FULL 0x01 /* Obsolete, not used anymore */ -#define RAM_SAVE_FLAG_COMPRESS 0x02 -#define RAM_SAVE_FLAG_MEM_SIZE 0x04 -#define RAM_SAVE_FLAG_PAGE 0x08 -#define RAM_SAVE_FLAG_EOS 0x10 -#define RAM_SAVE_FLAG_CONTINUE 0x20 -#define RAM_SAVE_FLAG_XBZRLE 0x40 -/* 0x80 is reserved in migration.h start with 0x100 next */ static struct defconfig_file { const char *filename; @@ -138,8 +113,6 @@ static struct defconfig_file { { NULL }, /* end of list */ }; -static const uint8_t ZERO_TARGET_PAGE[TARGET_PAGE_SIZE]; - int qemu_read_default_config_files(bool userconfig) { int ret; @@ -158,1100 +131,6 @@ int qemu_read_default_config_files(bool userconfig) return 0; } -static inline bool is_zero_range(uint8_t *p, uint64_t size) -{ - return buffer_find_nonzero_offset(p, size) == size; -} - -/* struct contains XBZRLE cache and a static page - used by the compression */ -static struct { - /* buffer used for XBZRLE encoding */ - uint8_t *encoded_buf; - /* buffer for storing page content */ - uint8_t *current_buf; - /* Cache for XBZRLE, Protected by lock. */ - PageCache *cache; - QemuMutex lock; -} XBZRLE; - -/* buffer used for XBZRLE decoding */ -static uint8_t *xbzrle_decoded_buf; - -static void XBZRLE_cache_lock(void) -{ - if (migrate_use_xbzrle()) - qemu_mutex_lock(&XBZRLE.lock); -} - -static void XBZRLE_cache_unlock(void) -{ - if (migrate_use_xbzrle()) - qemu_mutex_unlock(&XBZRLE.lock); -} - -/* - * called from qmp_migrate_set_cache_size in main thread, possibly while - * a migration is in progress. - * A running migration maybe using the cache and might finish during this - * call, hence changes to the cache are protected by XBZRLE.lock(). - */ -int64_t xbzrle_cache_resize(int64_t new_size) -{ - PageCache *new_cache; - int64_t ret; - - if (new_size < TARGET_PAGE_SIZE) { - return -1; - } - - XBZRLE_cache_lock(); - - if (XBZRLE.cache != NULL) { - if (pow2floor(new_size) == migrate_xbzrle_cache_size()) { - goto out_new_size; - } - new_cache = cache_init(new_size / TARGET_PAGE_SIZE, - TARGET_PAGE_SIZE); - if (!new_cache) { - error_report("Error creating cache"); - ret = -1; - goto out; - } - - cache_fini(XBZRLE.cache); - XBZRLE.cache = new_cache; - } - -out_new_size: - ret = pow2floor(new_size); -out: - XBZRLE_cache_unlock(); - return ret; -} - -/* accounting for migration statistics */ -typedef struct AccountingInfo { - uint64_t dup_pages; - uint64_t skipped_pages; - uint64_t norm_pages; - uint64_t iterations; - uint64_t xbzrle_bytes; - uint64_t xbzrle_pages; - uint64_t xbzrle_cache_miss; - double xbzrle_cache_miss_rate; - uint64_t xbzrle_overflows; -} AccountingInfo; - -static AccountingInfo acct_info; - -static void acct_clear(void) -{ - memset(&acct_info, 0, sizeof(acct_info)); -} - -uint64_t dup_mig_bytes_transferred(void) -{ - return acct_info.dup_pages * TARGET_PAGE_SIZE; -} - -uint64_t dup_mig_pages_transferred(void) -{ - return acct_info.dup_pages; -} - -uint64_t skipped_mig_bytes_transferred(void) -{ - return acct_info.skipped_pages * TARGET_PAGE_SIZE; -} - -uint64_t skipped_mig_pages_transferred(void) -{ - return acct_info.skipped_pages; -} - -uint64_t norm_mig_bytes_transferred(void) -{ - return acct_info.norm_pages * TARGET_PAGE_SIZE; -} - -uint64_t norm_mig_pages_transferred(void) -{ - return acct_info.norm_pages; -} - -uint64_t xbzrle_mig_bytes_transferred(void) -{ - return acct_info.xbzrle_bytes; -} - -uint64_t xbzrle_mig_pages_transferred(void) -{ - return acct_info.xbzrle_pages; -} - -uint64_t xbzrle_mig_pages_cache_miss(void) -{ - return acct_info.xbzrle_cache_miss; -} - -double xbzrle_mig_cache_miss_rate(void) -{ - return acct_info.xbzrle_cache_miss_rate; -} - -uint64_t xbzrle_mig_pages_overflow(void) -{ - return acct_info.xbzrle_overflows; -} - -/* This is the last block that we have visited serching for dirty pages - */ -static RAMBlock *last_seen_block; -/* This is the last block from where we have sent data */ -static RAMBlock *last_sent_block; -static ram_addr_t last_offset; -static unsigned long *migration_bitmap; -static uint64_t migration_dirty_pages; -static uint32_t last_version; -static bool ram_bulk_stage; - -/** - * save_page_header: Write page header to wire - * - * If this is the 1st block, it also writes the block identification - * - * Returns: Number of bytes written - * - * @f: QEMUFile where to send the data - * @block: block that contains the page we want to send - * @offset: offset inside the block for the page - * in the lower bits, it contains flags - */ -static size_t save_page_header(QEMUFile *f, RAMBlock *block, ram_addr_t offset) -{ - size_t size; - - qemu_put_be64(f, offset); - size = 8; - - if (!(offset & RAM_SAVE_FLAG_CONTINUE)) { - qemu_put_byte(f, strlen(block->idstr)); - qemu_put_buffer(f, (uint8_t *)block->idstr, - strlen(block->idstr)); - size += 1 + strlen(block->idstr); - } - return size; -} - -/* Update the xbzrle cache to reflect a page that's been sent as all 0. - * The important thing is that a stale (not-yet-0'd) page be replaced - * by the new data. - * As a bonus, if the page wasn't in the cache it gets added so that - * when a small write is made into the 0'd page it gets XBZRLE sent - */ -static void xbzrle_cache_zero_page(ram_addr_t current_addr) -{ - if (ram_bulk_stage || !migrate_use_xbzrle()) { - return; - } - - /* We don't care if this fails to allocate a new cache page - * as long as it updated an old one */ - cache_insert(XBZRLE.cache, current_addr, ZERO_TARGET_PAGE, - bitmap_sync_count); -} - -#define ENCODING_FLAG_XBZRLE 0x1 - -/** - * save_xbzrle_page: compress and send current page - * - * Returns: 1 means that we wrote the page - * 0 means that page is identical to the one already sent - * -1 means that xbzrle would be longer than normal - * - * @f: QEMUFile where to send the data - * @current_data: - * @current_addr: - * @block: block that contains the page we want to send - * @offset: offset inside the block for the page - * @last_stage: if we are at the completion stage - * @bytes_transferred: increase it with the number of transferred bytes - */ -static int save_xbzrle_page(QEMUFile *f, uint8_t **current_data, - ram_addr_t current_addr, RAMBlock *block, - ram_addr_t offset, bool last_stage, - uint64_t *bytes_transferred) -{ - int encoded_len = 0, bytes_xbzrle; - uint8_t *prev_cached_page; - - if (!cache_is_cached(XBZRLE.cache, current_addr, bitmap_sync_count)) { - acct_info.xbzrle_cache_miss++; - if (!last_stage) { - if (cache_insert(XBZRLE.cache, current_addr, *current_data, - bitmap_sync_count) == -1) { - return -1; - } else { - /* update *current_data when the page has been - inserted into cache */ - *current_data = get_cached_data(XBZRLE.cache, current_addr); - } - } - return -1; - } - - prev_cached_page = get_cached_data(XBZRLE.cache, current_addr); - - /* save current buffer into memory */ - memcpy(XBZRLE.current_buf, *current_data, TARGET_PAGE_SIZE); - - /* XBZRLE encoding (if there is no overflow) */ - encoded_len = xbzrle_encode_buffer(prev_cached_page, XBZRLE.current_buf, - TARGET_PAGE_SIZE, XBZRLE.encoded_buf, - TARGET_PAGE_SIZE); - if (encoded_len == 0) { - DPRINTF("Skipping unmodified page\n"); - return 0; - } else if (encoded_len == -1) { - DPRINTF("Overflow\n"); - acct_info.xbzrle_overflows++; - /* update data in the cache */ - if (!last_stage) { - memcpy(prev_cached_page, *current_data, TARGET_PAGE_SIZE); - *current_data = prev_cached_page; - } - return -1; - } - - /* we need to update the data in the cache, in order to get the same data */ - if (!last_stage) { - memcpy(prev_cached_page, XBZRLE.current_buf, TARGET_PAGE_SIZE); - } - - /* Send XBZRLE based compressed page */ - bytes_xbzrle = save_page_header(f, block, offset | RAM_SAVE_FLAG_XBZRLE); - qemu_put_byte(f, ENCODING_FLAG_XBZRLE); - qemu_put_be16(f, encoded_len); - qemu_put_buffer(f, XBZRLE.encoded_buf, encoded_len); - bytes_xbzrle += encoded_len + 1 + 2; - acct_info.xbzrle_pages++; - acct_info.xbzrle_bytes += bytes_xbzrle; - *bytes_transferred += bytes_xbzrle; - - return 1; -} - -static inline -ram_addr_t migration_bitmap_find_and_reset_dirty(MemoryRegion *mr, - ram_addr_t start) -{ - unsigned long base = mr->ram_addr >> TARGET_PAGE_BITS; - unsigned long nr = base + (start >> TARGET_PAGE_BITS); - uint64_t mr_size = TARGET_PAGE_ALIGN(memory_region_size(mr)); - unsigned long size = base + (mr_size >> TARGET_PAGE_BITS); - - unsigned long next; - - if (ram_bulk_stage && nr > base) { - next = nr + 1; - } else { - next = find_next_bit(migration_bitmap, size, nr); - } - - if (next < size) { - clear_bit(next, migration_bitmap); - migration_dirty_pages--; - } - return (next - base) << TARGET_PAGE_BITS; -} - -static inline bool migration_bitmap_set_dirty(ram_addr_t addr) -{ - bool ret; - int nr = addr >> TARGET_PAGE_BITS; - - ret = test_and_set_bit(nr, migration_bitmap); - - if (!ret) { - migration_dirty_pages++; - } - return ret; -} - -static void migration_bitmap_sync_range(ram_addr_t start, ram_addr_t length) -{ - ram_addr_t addr; - unsigned long page = BIT_WORD(start >> TARGET_PAGE_BITS); - - /* start address is aligned at the start of a word? */ - if (((page * BITS_PER_LONG) << TARGET_PAGE_BITS) == start) { - int k; - int nr = BITS_TO_LONGS(length >> TARGET_PAGE_BITS); - unsigned long *src = ram_list.dirty_memory[DIRTY_MEMORY_MIGRATION]; - - for (k = page; k < page + nr; k++) { - if (src[k]) { - unsigned long new_dirty; - new_dirty = ~migration_bitmap[k]; - migration_bitmap[k] |= src[k]; - new_dirty &= src[k]; - migration_dirty_pages += ctpopl(new_dirty); - src[k] = 0; - } - } - } else { - for (addr = 0; addr < length; addr += TARGET_PAGE_SIZE) { - if (cpu_physical_memory_get_dirty(start + addr, - TARGET_PAGE_SIZE, - DIRTY_MEMORY_MIGRATION)) { - cpu_physical_memory_reset_dirty(start + addr, - TARGET_PAGE_SIZE, - DIRTY_MEMORY_MIGRATION); - migration_bitmap_set_dirty(start + addr); - } - } - } -} - - -/* Fix me: there are too many global variables used in migration process. */ -static int64_t start_time; -static int64_t bytes_xfer_prev; -static int64_t num_dirty_pages_period; -static uint64_t xbzrle_cache_miss_prev; -static uint64_t iterations_prev; - -static void migration_bitmap_sync_init(void) -{ - start_time = 0; - bytes_xfer_prev = 0; - num_dirty_pages_period = 0; - xbzrle_cache_miss_prev = 0; - iterations_prev = 0; -} - -/* Called with iothread lock held, to protect ram_list.dirty_memory[] */ -static void migration_bitmap_sync(void) -{ - RAMBlock *block; - uint64_t num_dirty_pages_init = migration_dirty_pages; - MigrationState *s = migrate_get_current(); - int64_t end_time; - int64_t bytes_xfer_now; - - bitmap_sync_count++; - - if (!bytes_xfer_prev) { - bytes_xfer_prev = ram_bytes_transferred(); - } - - if (!start_time) { - start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); - } - - trace_migration_bitmap_sync_start(); - address_space_sync_dirty_bitmap(&address_space_memory); - - rcu_read_lock(); - QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { - migration_bitmap_sync_range(block->mr->ram_addr, block->used_length); - } - rcu_read_unlock(); - - trace_migration_bitmap_sync_end(migration_dirty_pages - - num_dirty_pages_init); - num_dirty_pages_period += migration_dirty_pages - num_dirty_pages_init; - end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); - - /* more than 1 second = 1000 millisecons */ - if (end_time > start_time + 1000) { - if (migrate_auto_converge()) { - /* The following detection logic can be refined later. For now: - Check to see if the dirtied bytes is 50% more than the approx. - amount of bytes that just got transferred since the last time we - were in this routine. If that happens >N times (for now N==4) - we turn on the throttle down logic */ - bytes_xfer_now = ram_bytes_transferred(); - if (s->dirty_pages_rate && - (num_dirty_pages_period * TARGET_PAGE_SIZE > - (bytes_xfer_now - bytes_xfer_prev)/2) && - (dirty_rate_high_cnt++ > 4)) { - trace_migration_throttle(); - mig_throttle_on = true; - dirty_rate_high_cnt = 0; - } - bytes_xfer_prev = bytes_xfer_now; - } else { - mig_throttle_on = false; - } - if (migrate_use_xbzrle()) { - if (iterations_prev != acct_info.iterations) { - acct_info.xbzrle_cache_miss_rate = - (double)(acct_info.xbzrle_cache_miss - - xbzrle_cache_miss_prev) / - (acct_info.iterations - iterations_prev); - } - iterations_prev = acct_info.iterations; - xbzrle_cache_miss_prev = acct_info.xbzrle_cache_miss; - } - s->dirty_pages_rate = num_dirty_pages_period * 1000 - / (end_time - start_time); - s->dirty_bytes_rate = s->dirty_pages_rate * TARGET_PAGE_SIZE; - start_time = end_time; - num_dirty_pages_period = 0; - s->dirty_sync_count = bitmap_sync_count; - } -} - -/** - * ram_save_page: Send the given page to the stream - * - * Returns: Number of pages written. - * - * @f: QEMUFile where to send the data - * @block: block that contains the page we want to send - * @offset: offset inside the block for the page - * @last_stage: if we are at the completion stage - * @bytes_transferred: increase it with the number of transferred bytes - */ -static int ram_save_page(QEMUFile *f, RAMBlock* block, ram_addr_t offset, - bool last_stage, uint64_t *bytes_transferred) -{ - int pages = -1; - uint64_t bytes_xmit; - ram_addr_t current_addr; - MemoryRegion *mr = block->mr; - uint8_t *p; - int ret; - bool send_async = true; - - p = memory_region_get_ram_ptr(mr) + offset; - - /* In doubt sent page as normal */ - bytes_xmit = 0; - ret = ram_control_save_page(f, block->offset, - offset, TARGET_PAGE_SIZE, &bytes_xmit); - if (bytes_xmit) { - *bytes_transferred += bytes_xmit; - pages = 1; - } - - XBZRLE_cache_lock(); - - current_addr = block->offset + offset; - - if (block == last_sent_block) { - offset |= RAM_SAVE_FLAG_CONTINUE; - } - if (ret != RAM_SAVE_CONTROL_NOT_SUPP) { - if (ret != RAM_SAVE_CONTROL_DELAYED) { - if (bytes_xmit > 0) { - acct_info.norm_pages++; - } else if (bytes_xmit == 0) { - acct_info.dup_pages++; - } - } - } else if (is_zero_range(p, TARGET_PAGE_SIZE)) { - acct_info.dup_pages++; - *bytes_transferred += save_page_header(f, block, - offset | RAM_SAVE_FLAG_COMPRESS); - qemu_put_byte(f, 0); - *bytes_transferred += 1; - pages = 1; - /* Must let xbzrle know, otherwise a previous (now 0'd) cached - * page would be stale - */ - xbzrle_cache_zero_page(current_addr); - } else if (!ram_bulk_stage && migrate_use_xbzrle()) { - pages = save_xbzrle_page(f, &p, current_addr, block, - offset, last_stage, bytes_transferred); - if (!last_stage) { - /* Can't send this cached data async, since the cache page - * might get updated before it gets to the wire - */ - send_async = false; - } - } - - /* XBZRLE overflow or normal page */ - if (pages == -1) { - *bytes_transferred += save_page_header(f, block, - offset | RAM_SAVE_FLAG_PAGE); - if (send_async) { - qemu_put_buffer_async(f, p, TARGET_PAGE_SIZE); - } else { - qemu_put_buffer(f, p, TARGET_PAGE_SIZE); - } - *bytes_transferred += TARGET_PAGE_SIZE; - pages = 1; - acct_info.norm_pages++; - } - - XBZRLE_cache_unlock(); - - return pages; -} - -/** - * ram_find_and_save_block: Finds a dirty page and sends it to f - * - * Called within an RCU critical section. - * - * Returns: The number of pages written - * 0 means no dirty pages - * - * @f: QEMUFile where to send the data - * @last_stage: if we are at the completion stage - * @bytes_transferred: increase it with the number of transferred bytes - */ - -static int ram_find_and_save_block(QEMUFile *f, bool last_stage, - uint64_t *bytes_transferred) -{ - RAMBlock *block = last_seen_block; - ram_addr_t offset = last_offset; - bool complete_round = false; - int pages = 0; - MemoryRegion *mr; - - if (!block) - block = QLIST_FIRST_RCU(&ram_list.blocks); - - while (true) { - mr = block->mr; - offset = migration_bitmap_find_and_reset_dirty(mr, offset); - if (complete_round && block == last_seen_block && - offset >= last_offset) { - break; - } - if (offset >= block->used_length) { - offset = 0; - block = QLIST_NEXT_RCU(block, next); - if (!block) { - block = QLIST_FIRST_RCU(&ram_list.blocks); - complete_round = true; - ram_bulk_stage = false; - } - } else { - pages = ram_save_page(f, block, offset, last_stage, - bytes_transferred); - - /* if page is unmodified, continue to the next */ - if (pages > 0) { - last_sent_block = block; - break; - } - } - } - - last_seen_block = block; - last_offset = offset; - - return pages; -} - -static uint64_t bytes_transferred; - -void acct_update_position(QEMUFile *f, size_t size, bool zero) -{ - uint64_t pages = size / TARGET_PAGE_SIZE; - if (zero) { - acct_info.dup_pages += pages; - } else { - acct_info.norm_pages += pages; - bytes_transferred += size; - qemu_update_position(f, size); - } -} - -static ram_addr_t ram_save_remaining(void) -{ - return migration_dirty_pages; -} - -uint64_t ram_bytes_remaining(void) -{ - return ram_save_remaining() * TARGET_PAGE_SIZE; -} - -uint64_t ram_bytes_transferred(void) -{ - return bytes_transferred; -} - -uint64_t ram_bytes_total(void) -{ - RAMBlock *block; - uint64_t total = 0; - - rcu_read_lock(); - QLIST_FOREACH_RCU(block, &ram_list.blocks, next) - total += block->used_length; - rcu_read_unlock(); - return total; -} - -void free_xbzrle_decoded_buf(void) -{ - g_free(xbzrle_decoded_buf); - xbzrle_decoded_buf = NULL; -} - -static void migration_end(void) -{ - if (migration_bitmap) { - memory_global_dirty_log_stop(); - g_free(migration_bitmap); - migration_bitmap = NULL; - } - - XBZRLE_cache_lock(); - if (XBZRLE.cache) { - cache_fini(XBZRLE.cache); - g_free(XBZRLE.encoded_buf); - g_free(XBZRLE.current_buf); - XBZRLE.cache = NULL; - XBZRLE.encoded_buf = NULL; - XBZRLE.current_buf = NULL; - } - XBZRLE_cache_unlock(); -} - -static void ram_migration_cancel(void *opaque) -{ - migration_end(); -} - -static void reset_ram_globals(void) -{ - last_seen_block = NULL; - last_sent_block = NULL; - last_offset = 0; - last_version = ram_list.version; - ram_bulk_stage = true; -} - -#define MAX_WAIT 50 /* ms, half buffered_file limit */ - - -/* Each of ram_save_setup, ram_save_iterate and ram_save_complete has - * long-running RCU critical section. When rcu-reclaims in the code - * start to become numerous it will be necessary to reduce the - * granularity of these critical sections. - */ - -static int ram_save_setup(QEMUFile *f, void *opaque) -{ - RAMBlock *block; - int64_t ram_bitmap_pages; /* Size of bitmap in pages, including gaps */ - - mig_throttle_on = false; - dirty_rate_high_cnt = 0; - bitmap_sync_count = 0; - migration_bitmap_sync_init(); - - if (migrate_use_xbzrle()) { - XBZRLE_cache_lock(); - XBZRLE.cache = cache_init(migrate_xbzrle_cache_size() / - TARGET_PAGE_SIZE, - TARGET_PAGE_SIZE); - if (!XBZRLE.cache) { - XBZRLE_cache_unlock(); - error_report("Error creating cache"); - return -1; - } - XBZRLE_cache_unlock(); - - /* We prefer not to abort if there is no memory */ - XBZRLE.encoded_buf = g_try_malloc0(TARGET_PAGE_SIZE); - if (!XBZRLE.encoded_buf) { - error_report("Error allocating encoded_buf"); - return -1; - } - - XBZRLE.current_buf = g_try_malloc(TARGET_PAGE_SIZE); - if (!XBZRLE.current_buf) { - error_report("Error allocating current_buf"); - g_free(XBZRLE.encoded_buf); - XBZRLE.encoded_buf = NULL; - return -1; - } - - acct_clear(); - } - - /* iothread lock needed for ram_list.dirty_memory[] */ - qemu_mutex_lock_iothread(); - qemu_mutex_lock_ramlist(); - rcu_read_lock(); - bytes_transferred = 0; - reset_ram_globals(); - - ram_bitmap_pages = last_ram_offset() >> TARGET_PAGE_BITS; - migration_bitmap = bitmap_new(ram_bitmap_pages); - bitmap_set(migration_bitmap, 0, ram_bitmap_pages); - - /* - * Count the total number of pages used by ram blocks not including any - * gaps due to alignment or unplugs. - */ - migration_dirty_pages = ram_bytes_total() >> TARGET_PAGE_BITS; - - memory_global_dirty_log_start(); - migration_bitmap_sync(); - qemu_mutex_unlock_ramlist(); - qemu_mutex_unlock_iothread(); - - qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE); - - QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { - qemu_put_byte(f, strlen(block->idstr)); - qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr)); - qemu_put_be64(f, block->used_length); - } - - rcu_read_unlock(); - - ram_control_before_iterate(f, RAM_CONTROL_SETUP); - ram_control_after_iterate(f, RAM_CONTROL_SETUP); - - qemu_put_be64(f, RAM_SAVE_FLAG_EOS); - - return 0; -} - -static int ram_save_iterate(QEMUFile *f, void *opaque) -{ - int ret; - int i; - int64_t t0; - int pages_sent = 0; - - rcu_read_lock(); - if (ram_list.version != last_version) { - reset_ram_globals(); - } - - /* Read version before ram_list.blocks */ - smp_rmb(); - - ram_control_before_iterate(f, RAM_CONTROL_ROUND); - - t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); - i = 0; - while ((ret = qemu_file_rate_limit(f)) == 0) { - int pages; - - pages = ram_find_and_save_block(f, false, &bytes_transferred); - /* no more pages to sent */ - if (pages == 0) { - break; - } - pages_sent += pages; - acct_info.iterations++; - check_guest_throttling(); - /* we want to check in the 1st loop, just in case it was the 1st time - and we had to sync the dirty bitmap. - qemu_get_clock_ns() is a bit expensive, so we only check each some - iterations - */ - if ((i & 63) == 0) { - uint64_t t1 = (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - t0) / 1000000; - if (t1 > MAX_WAIT) { - DPRINTF("big wait: %" PRIu64 " milliseconds, %d iterations\n", - t1, i); - break; - } - } - i++; - } - rcu_read_unlock(); - - /* - * Must occur before EOS (or any QEMUFile operation) - * because of RDMA protocol. - */ - ram_control_after_iterate(f, RAM_CONTROL_ROUND); - - qemu_put_be64(f, RAM_SAVE_FLAG_EOS); - bytes_transferred += 8; - - ret = qemu_file_get_error(f); - if (ret < 0) { - return ret; - } - - return pages_sent; -} - -/* Called with iothread lock */ -static int ram_save_complete(QEMUFile *f, void *opaque) -{ - rcu_read_lock(); - - migration_bitmap_sync(); - - ram_control_before_iterate(f, RAM_CONTROL_FINISH); - - /* try transferring iterative blocks of memory */ - - /* flush all remaining blocks regardless of rate limiting */ - while (true) { - int pages; - - pages = ram_find_and_save_block(f, true, &bytes_transferred); - /* no more blocks to sent */ - if (pages == 0) { - break; - } - } - - ram_control_after_iterate(f, RAM_CONTROL_FINISH); - migration_end(); - - rcu_read_unlock(); - qemu_put_be64(f, RAM_SAVE_FLAG_EOS); - - return 0; -} - -static uint64_t ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size) -{ - uint64_t remaining_size; - - remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE; - - if (remaining_size < max_size) { - qemu_mutex_lock_iothread(); - rcu_read_lock(); - migration_bitmap_sync(); - rcu_read_unlock(); - qemu_mutex_unlock_iothread(); - remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE; - } - return remaining_size; -} - -static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host) -{ - unsigned int xh_len; - int xh_flags; - - if (!xbzrle_decoded_buf) { - xbzrle_decoded_buf = g_malloc(TARGET_PAGE_SIZE); - } - - /* extract RLE header */ - xh_flags = qemu_get_byte(f); - xh_len = qemu_get_be16(f); - - if (xh_flags != ENCODING_FLAG_XBZRLE) { - error_report("Failed to load XBZRLE page - wrong compression!"); - return -1; - } - - if (xh_len > TARGET_PAGE_SIZE) { - error_report("Failed to load XBZRLE page - len overflow!"); - return -1; - } - /* load data and decode */ - qemu_get_buffer(f, xbzrle_decoded_buf, xh_len); - - /* decode RLE */ - if (xbzrle_decode_buffer(xbzrle_decoded_buf, xh_len, host, - TARGET_PAGE_SIZE) == -1) { - error_report("Failed to load XBZRLE page - decode error!"); - return -1; - } - - return 0; -} - -/* Must be called from within a rcu critical section. - * Returns a pointer from within the RCU-protected ram_list. - */ -static inline void *host_from_stream_offset(QEMUFile *f, - ram_addr_t offset, - int flags) -{ - static RAMBlock *block = NULL; - char id[256]; - uint8_t len; - - if (flags & RAM_SAVE_FLAG_CONTINUE) { - if (!block || block->max_length <= offset) { - error_report("Ack, bad migration stream!"); - return NULL; - } - - return memory_region_get_ram_ptr(block->mr) + offset; - } - - len = qemu_get_byte(f); - qemu_get_buffer(f, (uint8_t *)id, len); - id[len] = 0; - - QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { - if (!strncmp(id, block->idstr, sizeof(id)) && - block->max_length > offset) { - return memory_region_get_ram_ptr(block->mr) + offset; - } - } - - error_report("Can't find block %s!", id); - return NULL; -} - -/* - * If a page (or a whole RDMA chunk) has been - * determined to be zero, then zap it. - */ -void ram_handle_compressed(void *host, uint8_t ch, uint64_t size) -{ - if (ch != 0 || !is_zero_range(host, size)) { - memset(host, ch, size); - } -} - -static int ram_load(QEMUFile *f, void *opaque, int version_id) -{ - int flags = 0, ret = 0; - static uint64_t seq_iter; - - seq_iter++; - - if (version_id != 4) { - ret = -EINVAL; - } - - /* This RCU critical section can be very long running. - * When RCU reclaims in the code start to become numerous, - * it will be necessary to reduce the granularity of this - * critical section. - */ - rcu_read_lock(); - while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) { - ram_addr_t addr, total_ram_bytes; - void *host; - uint8_t ch; - - addr = qemu_get_be64(f); - flags = addr & ~TARGET_PAGE_MASK; - addr &= TARGET_PAGE_MASK; - - switch (flags & ~RAM_SAVE_FLAG_CONTINUE) { - case RAM_SAVE_FLAG_MEM_SIZE: - /* Synchronize RAM block list */ - total_ram_bytes = addr; - while (!ret && total_ram_bytes) { - RAMBlock *block; - uint8_t len; - char id[256]; - ram_addr_t length; - - len = qemu_get_byte(f); - qemu_get_buffer(f, (uint8_t *)id, len); - id[len] = 0; - length = qemu_get_be64(f); - - QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { - if (!strncmp(id, block->idstr, sizeof(id))) { - if (length != block->used_length) { - Error *local_err = NULL; - - ret = qemu_ram_resize(block->offset, length, &local_err); - if (local_err) { - error_report_err(local_err); - } - } - ram_control_load_hook(f, RAM_CONTROL_BLOCK_REG, - block->idstr); - break; - } - } - - if (!block) { - error_report("Unknown ramblock \"%s\", cannot " - "accept migration", id); - ret = -EINVAL; - } - - total_ram_bytes -= length; - } - break; - case RAM_SAVE_FLAG_COMPRESS: - host = host_from_stream_offset(f, addr, flags); - if (!host) { - error_report("Illegal RAM offset " RAM_ADDR_FMT, addr); - ret = -EINVAL; - break; - } - ch = qemu_get_byte(f); - ram_handle_compressed(host, ch, TARGET_PAGE_SIZE); - break; - case RAM_SAVE_FLAG_PAGE: - host = host_from_stream_offset(f, addr, flags); - if (!host) { - error_report("Illegal RAM offset " RAM_ADDR_FMT, addr); - ret = -EINVAL; - break; - } - qemu_get_buffer(f, host, TARGET_PAGE_SIZE); - break; - case RAM_SAVE_FLAG_XBZRLE: - host = host_from_stream_offset(f, addr, flags); - if (!host) { - error_report("Illegal RAM offset " RAM_ADDR_FMT, addr); - ret = -EINVAL; - break; - } - if (load_xbzrle(f, addr, host) < 0) { - error_report("Failed to decompress XBZRLE page at " - RAM_ADDR_FMT, addr); - ret = -EINVAL; - break; - } - break; - case RAM_SAVE_FLAG_EOS: - /* normal exit */ - break; - default: - if (flags & RAM_SAVE_FLAG_HOOK) { - ram_control_load_hook(f, RAM_CONTROL_HOOK, NULL); - } else { - error_report("Unknown combination of migration flags: %#x", - flags); - ret = -EINVAL; - } - } - if (!ret) { - ret = qemu_file_get_error(f); - } - } - - rcu_read_unlock(); - DPRINTF("Completed load of VM with exit code %d seq iteration " - "%" PRIu64 "\n", ret, seq_iter); - return ret; -} - -static SaveVMHandlers savevm_ram_handlers = { - .save_live_setup = ram_save_setup, - .save_live_iterate = ram_save_iterate, - .save_live_complete = ram_save_complete, - .save_live_pending = ram_save_pending, - .load_state = ram_load, - .cancel = ram_migration_cancel, -}; - -void ram_mig_init(void) -{ - qemu_mutex_init(&XBZRLE.lock); - register_savevm_live(NULL, "ram", 0, 4, &savevm_ram_handlers, NULL); -} - struct soundhw { const char *name; const char *descr; @@ -1451,52 +330,3 @@ TargetInfo *qmp_query_target(Error **errp) return info; } - -/* Stub function that's gets run on the vcpu when its brought out of the - VM to run inside qemu via async_run_on_cpu()*/ -static void mig_sleep_cpu(void *opq) -{ - qemu_mutex_unlock_iothread(); - g_usleep(30*1000); - qemu_mutex_lock_iothread(); -} - -/* To reduce the dirty rate explicitly disallow the VCPUs from spending - much time in the VM. The migration thread will try to catchup. - Workload will experience a performance drop. -*/ -static void mig_throttle_guest_down(void) -{ - CPUState *cpu; - - qemu_mutex_lock_iothread(); - CPU_FOREACH(cpu) { - async_run_on_cpu(cpu, mig_sleep_cpu, NULL); - } - qemu_mutex_unlock_iothread(); -} - -static void check_guest_throttling(void) -{ - static int64_t t0; - int64_t t1; - - if (!mig_throttle_on) { - return; - } - - if (!t0) { - t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); - return; - } - - t1 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); - - /* If it has been more than 40 ms since the last time the guest - * was throttled then do it again. - */ - if (40 < (t1-t0)/1000000) { - mig_throttle_guest_down(); - t0 = t1; - } -} diff --git a/include/migration/migration.h b/include/migration/migration.h index b901d0f..b89d0b8 100644 --- a/include/migration/migration.h +++ b/include/migration/migration.h @@ -170,6 +170,7 @@ size_t ram_control_save_page(QEMUFile *f, ram_addr_t block_offset, ram_addr_t offset, size_t size, uint64_t *bytes_sent); +void ram_mig_init(void); /* * Disables a load of subsections that were added in 2.2/rh7.2 for backwards diff --git a/include/sysemu/arch_init.h b/include/sysemu/arch_init.h index 54b36c1..c38892f 100644 --- a/include/sysemu/arch_init.h +++ b/include/sysemu/arch_init.h @@ -30,7 +30,6 @@ extern const uint32_t arch_type; void select_soundhw(const char *optarg); void do_acpitable_option(const QemuOpts *opts); void do_smbios_option(QemuOpts *opts); -void ram_mig_init(void); void cpudef_init(void); void audio_init(void); int kvm_available(void); diff --git a/migration/ram.c b/migration/ram.c new file mode 100644 index 0000000..8548d26 --- /dev/null +++ b/migration/ram.c @@ -0,0 +1,1220 @@ +/* + * QEMU System Emulator + * + * Copyright (c) 2003-2008 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include +#include +#ifndef _WIN32 +#include +#include +#endif +#include "config.h" +#include "monitor/monitor.h" +#include "sysemu/sysemu.h" +#include "qemu/bitops.h" +#include "qemu/bitmap.h" +#include "hw/i386/pc.h" +#include "hw/pci/pci.h" +#include "hw/audio/audio.h" +#include "migration/migration.h" +#include "exec/address-spaces.h" +#include "migration/page_cache.h" +#include "qemu/config-file.h" +#include "qemu/error-report.h" +#include "qmp-commands.h" +#include "trace.h" +#include "exec/cpu-all.h" +#include "exec/ram_addr.h" +#include "qemu/host-utils.h" +#include "qemu/rcu_queue.h" + +#ifdef DEBUG_MIGRATION_RAM +#define DPRINTF(fmt, ...) \ + do { fprintf(stdout, "migration_ram: " fmt, ## __VA_ARGS__); } while (0) +#else +#define DPRINTF(fmt, ...) \ + do { } while (0) +#endif + +static bool mig_throttle_on; +static int dirty_rate_high_cnt; +static void check_guest_throttling(void); + +static uint64_t bitmap_sync_count; + +/***********************************************************/ +/* ram save/restore */ + +#define RAM_SAVE_FLAG_FULL 0x01 /* Obsolete, not used anymore */ +#define RAM_SAVE_FLAG_COMPRESS 0x02 +#define RAM_SAVE_FLAG_MEM_SIZE 0x04 +#define RAM_SAVE_FLAG_PAGE 0x08 +#define RAM_SAVE_FLAG_EOS 0x10 +#define RAM_SAVE_FLAG_CONTINUE 0x20 +#define RAM_SAVE_FLAG_XBZRLE 0x40 +/* 0x80 is reserved in migration.h start with 0x100 next */ + +static const uint8_t ZERO_TARGET_PAGE[TARGET_PAGE_SIZE]; + +static inline bool is_zero_range(uint8_t *p, uint64_t size) +{ + return buffer_find_nonzero_offset(p, size) == size; +} + +/* struct contains XBZRLE cache and a static page + used by the compression */ +static struct { + /* buffer used for XBZRLE encoding */ + uint8_t *encoded_buf; + /* buffer for storing page content */ + uint8_t *current_buf; + /* Cache for XBZRLE, Protected by lock. */ + PageCache *cache; + QemuMutex lock; +} XBZRLE; + +/* buffer used for XBZRLE decoding */ +static uint8_t *xbzrle_decoded_buf; + +static void XBZRLE_cache_lock(void) +{ + if (migrate_use_xbzrle()) + qemu_mutex_lock(&XBZRLE.lock); +} + +static void XBZRLE_cache_unlock(void) +{ + if (migrate_use_xbzrle()) + qemu_mutex_unlock(&XBZRLE.lock); +} + +/* + * called from qmp_migrate_set_cache_size in main thread, possibly while + * a migration is in progress. + * A running migration maybe using the cache and might finish during this + * call, hence changes to the cache are protected by XBZRLE.lock(). + */ +int64_t xbzrle_cache_resize(int64_t new_size) +{ + PageCache *new_cache; + int64_t ret; + + if (new_size < TARGET_PAGE_SIZE) { + return -1; + } + + XBZRLE_cache_lock(); + + if (XBZRLE.cache != NULL) { + if (pow2floor(new_size) == migrate_xbzrle_cache_size()) { + goto out_new_size; + } + new_cache = cache_init(new_size / TARGET_PAGE_SIZE, + TARGET_PAGE_SIZE); + if (!new_cache) { + error_report("Error creating cache"); + ret = -1; + goto out; + } + + cache_fini(XBZRLE.cache); + XBZRLE.cache = new_cache; + } + +out_new_size: + ret = pow2floor(new_size); +out: + XBZRLE_cache_unlock(); + return ret; +} + +/* accounting for migration statistics */ +typedef struct AccountingInfo { + uint64_t dup_pages; + uint64_t skipped_pages; + uint64_t norm_pages; + uint64_t iterations; + uint64_t xbzrle_bytes; + uint64_t xbzrle_pages; + uint64_t xbzrle_cache_miss; + double xbzrle_cache_miss_rate; + uint64_t xbzrle_overflows; +} AccountingInfo; + +static AccountingInfo acct_info; + +static void acct_clear(void) +{ + memset(&acct_info, 0, sizeof(acct_info)); +} + +uint64_t dup_mig_bytes_transferred(void) +{ + return acct_info.dup_pages * TARGET_PAGE_SIZE; +} + +uint64_t dup_mig_pages_transferred(void) +{ + return acct_info.dup_pages; +} + +uint64_t skipped_mig_bytes_transferred(void) +{ + return acct_info.skipped_pages * TARGET_PAGE_SIZE; +} + +uint64_t skipped_mig_pages_transferred(void) +{ + return acct_info.skipped_pages; +} + +uint64_t norm_mig_bytes_transferred(void) +{ + return acct_info.norm_pages * TARGET_PAGE_SIZE; +} + +uint64_t norm_mig_pages_transferred(void) +{ + return acct_info.norm_pages; +} + +uint64_t xbzrle_mig_bytes_transferred(void) +{ + return acct_info.xbzrle_bytes; +} + +uint64_t xbzrle_mig_pages_transferred(void) +{ + return acct_info.xbzrle_pages; +} + +uint64_t xbzrle_mig_pages_cache_miss(void) +{ + return acct_info.xbzrle_cache_miss; +} + +double xbzrle_mig_cache_miss_rate(void) +{ + return acct_info.xbzrle_cache_miss_rate; +} + +uint64_t xbzrle_mig_pages_overflow(void) +{ + return acct_info.xbzrle_overflows; +} + +/* This is the last block that we have visited serching for dirty pages + */ +static RAMBlock *last_seen_block; +/* This is the last block from where we have sent data */ +static RAMBlock *last_sent_block; +static ram_addr_t last_offset; +static unsigned long *migration_bitmap; +static uint64_t migration_dirty_pages; +static uint32_t last_version; +static bool ram_bulk_stage; + +/** + * save_page_header: Write page header to wire + * + * If this is the 1st block, it also writes the block identification + * + * Returns: Number of bytes written + * + * @f: QEMUFile where to send the data + * @block: block that contains the page we want to send + * @offset: offset inside the block for the page + * in the lower bits, it contains flags + */ +static size_t save_page_header(QEMUFile *f, RAMBlock *block, ram_addr_t offset) +{ + size_t size; + + qemu_put_be64(f, offset); + size = 8; + + if (!(offset & RAM_SAVE_FLAG_CONTINUE)) { + qemu_put_byte(f, strlen(block->idstr)); + qemu_put_buffer(f, (uint8_t *)block->idstr, + strlen(block->idstr)); + size += 1 + strlen(block->idstr); + } + return size; +} + +/* Update the xbzrle cache to reflect a page that's been sent as all 0. + * The important thing is that a stale (not-yet-0'd) page be replaced + * by the new data. + * As a bonus, if the page wasn't in the cache it gets added so that + * when a small write is made into the 0'd page it gets XBZRLE sent + */ +static void xbzrle_cache_zero_page(ram_addr_t current_addr) +{ + if (ram_bulk_stage || !migrate_use_xbzrle()) { + return; + } + + /* We don't care if this fails to allocate a new cache page + * as long as it updated an old one */ + cache_insert(XBZRLE.cache, current_addr, ZERO_TARGET_PAGE, + bitmap_sync_count); +} + +#define ENCODING_FLAG_XBZRLE 0x1 + +/** + * save_xbzrle_page: compress and send current page + * + * Returns: 1 means that we wrote the page + * 0 means that page is identical to the one already sent + * -1 means that xbzrle would be longer than normal + * + * @f: QEMUFile where to send the data + * @current_data: + * @current_addr: + * @block: block that contains the page we want to send + * @offset: offset inside the block for the page + * @last_stage: if we are at the completion stage + * @bytes_transferred: increase it with the number of transferred bytes + */ +static int save_xbzrle_page(QEMUFile *f, uint8_t **current_data, + ram_addr_t current_addr, RAMBlock *block, + ram_addr_t offset, bool last_stage, + uint64_t *bytes_transferred) +{ + int encoded_len = 0, bytes_xbzrle; + uint8_t *prev_cached_page; + + if (!cache_is_cached(XBZRLE.cache, current_addr, bitmap_sync_count)) { + acct_info.xbzrle_cache_miss++; + if (!last_stage) { + if (cache_insert(XBZRLE.cache, current_addr, *current_data, + bitmap_sync_count) == -1) { + return -1; + } else { + /* update *current_data when the page has been + inserted into cache */ + *current_data = get_cached_data(XBZRLE.cache, current_addr); + } + } + return -1; + } + + prev_cached_page = get_cached_data(XBZRLE.cache, current_addr); + + /* save current buffer into memory */ + memcpy(XBZRLE.current_buf, *current_data, TARGET_PAGE_SIZE); + + /* XBZRLE encoding (if there is no overflow) */ + encoded_len = xbzrle_encode_buffer(prev_cached_page, XBZRLE.current_buf, + TARGET_PAGE_SIZE, XBZRLE.encoded_buf, + TARGET_PAGE_SIZE); + if (encoded_len == 0) { + DPRINTF("Skipping unmodified page\n"); + return 0; + } else if (encoded_len == -1) { + DPRINTF("Overflow\n"); + acct_info.xbzrle_overflows++; + /* update data in the cache */ + if (!last_stage) { + memcpy(prev_cached_page, *current_data, TARGET_PAGE_SIZE); + *current_data = prev_cached_page; + } + return -1; + } + + /* we need to update the data in the cache, in order to get the same data */ + if (!last_stage) { + memcpy(prev_cached_page, XBZRLE.current_buf, TARGET_PAGE_SIZE); + } + + /* Send XBZRLE based compressed page */ + bytes_xbzrle = save_page_header(f, block, offset | RAM_SAVE_FLAG_XBZRLE); + qemu_put_byte(f, ENCODING_FLAG_XBZRLE); + qemu_put_be16(f, encoded_len); + qemu_put_buffer(f, XBZRLE.encoded_buf, encoded_len); + bytes_xbzrle += encoded_len + 1 + 2; + acct_info.xbzrle_pages++; + acct_info.xbzrle_bytes += bytes_xbzrle; + *bytes_transferred += bytes_xbzrle; + + return 1; +} + +static inline +ram_addr_t migration_bitmap_find_and_reset_dirty(MemoryRegion *mr, + ram_addr_t start) +{ + unsigned long base = mr->ram_addr >> TARGET_PAGE_BITS; + unsigned long nr = base + (start >> TARGET_PAGE_BITS); + uint64_t mr_size = TARGET_PAGE_ALIGN(memory_region_size(mr)); + unsigned long size = base + (mr_size >> TARGET_PAGE_BITS); + + unsigned long next; + + if (ram_bulk_stage && nr > base) { + next = nr + 1; + } else { + next = find_next_bit(migration_bitmap, size, nr); + } + + if (next < size) { + clear_bit(next, migration_bitmap); + migration_dirty_pages--; + } + return (next - base) << TARGET_PAGE_BITS; +} + +static inline bool migration_bitmap_set_dirty(ram_addr_t addr) +{ + bool ret; + int nr = addr >> TARGET_PAGE_BITS; + + ret = test_and_set_bit(nr, migration_bitmap); + + if (!ret) { + migration_dirty_pages++; + } + return ret; +} + +static void migration_bitmap_sync_range(ram_addr_t start, ram_addr_t length) +{ + ram_addr_t addr; + unsigned long page = BIT_WORD(start >> TARGET_PAGE_BITS); + + /* start address is aligned at the start of a word? */ + if (((page * BITS_PER_LONG) << TARGET_PAGE_BITS) == start) { + int k; + int nr = BITS_TO_LONGS(length >> TARGET_PAGE_BITS); + unsigned long *src = ram_list.dirty_memory[DIRTY_MEMORY_MIGRATION]; + + for (k = page; k < page + nr; k++) { + if (src[k]) { + unsigned long new_dirty; + new_dirty = ~migration_bitmap[k]; + migration_bitmap[k] |= src[k]; + new_dirty &= src[k]; + migration_dirty_pages += ctpopl(new_dirty); + src[k] = 0; + } + } + } else { + for (addr = 0; addr < length; addr += TARGET_PAGE_SIZE) { + if (cpu_physical_memory_get_dirty(start + addr, + TARGET_PAGE_SIZE, + DIRTY_MEMORY_MIGRATION)) { + cpu_physical_memory_reset_dirty(start + addr, + TARGET_PAGE_SIZE, + DIRTY_MEMORY_MIGRATION); + migration_bitmap_set_dirty(start + addr); + } + } + } +} + + +/* Fix me: there are too many global variables used in migration process. */ +static int64_t start_time; +static int64_t bytes_xfer_prev; +static int64_t num_dirty_pages_period; +static uint64_t xbzrle_cache_miss_prev; +static uint64_t iterations_prev; + +static void migration_bitmap_sync_init(void) +{ + start_time = 0; + bytes_xfer_prev = 0; + num_dirty_pages_period = 0; + xbzrle_cache_miss_prev = 0; + iterations_prev = 0; +} + +/* Called with iothread lock held, to protect ram_list.dirty_memory[] */ +static void migration_bitmap_sync(void) +{ + RAMBlock *block; + uint64_t num_dirty_pages_init = migration_dirty_pages; + MigrationState *s = migrate_get_current(); + int64_t end_time; + int64_t bytes_xfer_now; + + bitmap_sync_count++; + + if (!bytes_xfer_prev) { + bytes_xfer_prev = ram_bytes_transferred(); + } + + if (!start_time) { + start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); + } + + trace_migration_bitmap_sync_start(); + address_space_sync_dirty_bitmap(&address_space_memory); + + rcu_read_lock(); + QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { + migration_bitmap_sync_range(block->mr->ram_addr, block->used_length); + } + rcu_read_unlock(); + + trace_migration_bitmap_sync_end(migration_dirty_pages + - num_dirty_pages_init); + num_dirty_pages_period += migration_dirty_pages - num_dirty_pages_init; + end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); + + /* more than 1 second = 1000 millisecons */ + if (end_time > start_time + 1000) { + if (migrate_auto_converge()) { + /* The following detection logic can be refined later. For now: + Check to see if the dirtied bytes is 50% more than the approx. + amount of bytes that just got transferred since the last time we + were in this routine. If that happens >N times (for now N==4) + we turn on the throttle down logic */ + bytes_xfer_now = ram_bytes_transferred(); + if (s->dirty_pages_rate && + (num_dirty_pages_period * TARGET_PAGE_SIZE > + (bytes_xfer_now - bytes_xfer_prev)/2) && + (dirty_rate_high_cnt++ > 4)) { + trace_migration_throttle(); + mig_throttle_on = true; + dirty_rate_high_cnt = 0; + } + bytes_xfer_prev = bytes_xfer_now; + } else { + mig_throttle_on = false; + } + if (migrate_use_xbzrle()) { + if (iterations_prev != acct_info.iterations) { + acct_info.xbzrle_cache_miss_rate = + (double)(acct_info.xbzrle_cache_miss - + xbzrle_cache_miss_prev) / + (acct_info.iterations - iterations_prev); + } + iterations_prev = acct_info.iterations; + xbzrle_cache_miss_prev = acct_info.xbzrle_cache_miss; + } + s->dirty_pages_rate = num_dirty_pages_period * 1000 + / (end_time - start_time); + s->dirty_bytes_rate = s->dirty_pages_rate * TARGET_PAGE_SIZE; + start_time = end_time; + num_dirty_pages_period = 0; + s->dirty_sync_count = bitmap_sync_count; + } +} + +/** + * ram_save_page: Send the given page to the stream + * + * Returns: Number of pages written. + * + * @f: QEMUFile where to send the data + * @block: block that contains the page we want to send + * @offset: offset inside the block for the page + * @last_stage: if we are at the completion stage + * @bytes_transferred: increase it with the number of transferred bytes + */ +static int ram_save_page(QEMUFile *f, RAMBlock* block, ram_addr_t offset, + bool last_stage, uint64_t *bytes_transferred) +{ + int pages = -1; + uint64_t bytes_xmit; + ram_addr_t current_addr; + MemoryRegion *mr = block->mr; + uint8_t *p; + int ret; + bool send_async = true; + + p = memory_region_get_ram_ptr(mr) + offset; + + /* In doubt sent page as normal */ + bytes_xmit = 0; + ret = ram_control_save_page(f, block->offset, + offset, TARGET_PAGE_SIZE, &bytes_xmit); + if (bytes_xmit) { + *bytes_transferred += bytes_xmit; + pages = 1; + } + + XBZRLE_cache_lock(); + + current_addr = block->offset + offset; + + if (block == last_sent_block) { + offset |= RAM_SAVE_FLAG_CONTINUE; + } + if (ret != RAM_SAVE_CONTROL_NOT_SUPP) { + if (ret != RAM_SAVE_CONTROL_DELAYED) { + if (bytes_xmit > 0) { + acct_info.norm_pages++; + } else if (bytes_xmit == 0) { + acct_info.dup_pages++; + } + } + } else if (is_zero_range(p, TARGET_PAGE_SIZE)) { + acct_info.dup_pages++; + *bytes_transferred += save_page_header(f, block, + offset | RAM_SAVE_FLAG_COMPRESS); + qemu_put_byte(f, 0); + *bytes_transferred += 1; + pages = 1; + /* Must let xbzrle know, otherwise a previous (now 0'd) cached + * page would be stale + */ + xbzrle_cache_zero_page(current_addr); + } else if (!ram_bulk_stage && migrate_use_xbzrle()) { + pages = save_xbzrle_page(f, &p, current_addr, block, + offset, last_stage, bytes_transferred); + if (!last_stage) { + /* Can't send this cached data async, since the cache page + * might get updated before it gets to the wire + */ + send_async = false; + } + } + + /* XBZRLE overflow or normal page */ + if (pages == -1) { + *bytes_transferred += save_page_header(f, block, + offset | RAM_SAVE_FLAG_PAGE); + if (send_async) { + qemu_put_buffer_async(f, p, TARGET_PAGE_SIZE); + } else { + qemu_put_buffer(f, p, TARGET_PAGE_SIZE); + } + *bytes_transferred += TARGET_PAGE_SIZE; + pages = 1; + acct_info.norm_pages++; + } + + XBZRLE_cache_unlock(); + + return pages; +} + +/** + * ram_find_and_save_block: Finds a dirty page and sends it to f + * + * Called within an RCU critical section. + * + * Returns: The number of pages written + * 0 means no dirty pages + * + * @f: QEMUFile where to send the data + * @last_stage: if we are at the completion stage + * @bytes_transferred: increase it with the number of transferred bytes + */ + +static int ram_find_and_save_block(QEMUFile *f, bool last_stage, + uint64_t *bytes_transferred) +{ + RAMBlock *block = last_seen_block; + ram_addr_t offset = last_offset; + bool complete_round = false; + int pages = 0; + MemoryRegion *mr; + + if (!block) + block = QLIST_FIRST_RCU(&ram_list.blocks); + + while (true) { + mr = block->mr; + offset = migration_bitmap_find_and_reset_dirty(mr, offset); + if (complete_round && block == last_seen_block && + offset >= last_offset) { + break; + } + if (offset >= block->used_length) { + offset = 0; + block = QLIST_NEXT_RCU(block, next); + if (!block) { + block = QLIST_FIRST_RCU(&ram_list.blocks); + complete_round = true; + ram_bulk_stage = false; + } + } else { + pages = ram_save_page(f, block, offset, last_stage, + bytes_transferred); + + /* if page is unmodified, continue to the next */ + if (pages > 0) { + last_sent_block = block; + break; + } + } + } + + last_seen_block = block; + last_offset = offset; + + return pages; +} + +static uint64_t bytes_transferred; + +void acct_update_position(QEMUFile *f, size_t size, bool zero) +{ + uint64_t pages = size / TARGET_PAGE_SIZE; + if (zero) { + acct_info.dup_pages += pages; + } else { + acct_info.norm_pages += pages; + bytes_transferred += size; + qemu_update_position(f, size); + } +} + +static ram_addr_t ram_save_remaining(void) +{ + return migration_dirty_pages; +} + +uint64_t ram_bytes_remaining(void) +{ + return ram_save_remaining() * TARGET_PAGE_SIZE; +} + +uint64_t ram_bytes_transferred(void) +{ + return bytes_transferred; +} + +uint64_t ram_bytes_total(void) +{ + RAMBlock *block; + uint64_t total = 0; + + rcu_read_lock(); + QLIST_FOREACH_RCU(block, &ram_list.blocks, next) + total += block->used_length; + rcu_read_unlock(); + return total; +} + +void free_xbzrle_decoded_buf(void) +{ + g_free(xbzrle_decoded_buf); + xbzrle_decoded_buf = NULL; +} + +static void migration_end(void) +{ + if (migration_bitmap) { + memory_global_dirty_log_stop(); + g_free(migration_bitmap); + migration_bitmap = NULL; + } + + XBZRLE_cache_lock(); + if (XBZRLE.cache) { + cache_fini(XBZRLE.cache); + g_free(XBZRLE.encoded_buf); + g_free(XBZRLE.current_buf); + XBZRLE.cache = NULL; + XBZRLE.encoded_buf = NULL; + XBZRLE.current_buf = NULL; + } + XBZRLE_cache_unlock(); +} + +static void ram_migration_cancel(void *opaque) +{ + migration_end(); +} + +static void reset_ram_globals(void) +{ + last_seen_block = NULL; + last_sent_block = NULL; + last_offset = 0; + last_version = ram_list.version; + ram_bulk_stage = true; +} + +#define MAX_WAIT 50 /* ms, half buffered_file limit */ + + +/* Each of ram_save_setup, ram_save_iterate and ram_save_complete has + * long-running RCU critical section. When rcu-reclaims in the code + * start to become numerous it will be necessary to reduce the + * granularity of these critical sections. + */ + +static int ram_save_setup(QEMUFile *f, void *opaque) +{ + RAMBlock *block; + int64_t ram_bitmap_pages; /* Size of bitmap in pages, including gaps */ + + mig_throttle_on = false; + dirty_rate_high_cnt = 0; + bitmap_sync_count = 0; + migration_bitmap_sync_init(); + + if (migrate_use_xbzrle()) { + XBZRLE_cache_lock(); + XBZRLE.cache = cache_init(migrate_xbzrle_cache_size() / + TARGET_PAGE_SIZE, + TARGET_PAGE_SIZE); + if (!XBZRLE.cache) { + XBZRLE_cache_unlock(); + error_report("Error creating cache"); + return -1; + } + XBZRLE_cache_unlock(); + + /* We prefer not to abort if there is no memory */ + XBZRLE.encoded_buf = g_try_malloc0(TARGET_PAGE_SIZE); + if (!XBZRLE.encoded_buf) { + error_report("Error allocating encoded_buf"); + return -1; + } + + XBZRLE.current_buf = g_try_malloc(TARGET_PAGE_SIZE); + if (!XBZRLE.current_buf) { + error_report("Error allocating current_buf"); + g_free(XBZRLE.encoded_buf); + XBZRLE.encoded_buf = NULL; + return -1; + } + + acct_clear(); + } + + /* iothread lock needed for ram_list.dirty_memory[] */ + qemu_mutex_lock_iothread(); + qemu_mutex_lock_ramlist(); + rcu_read_lock(); + bytes_transferred = 0; + reset_ram_globals(); + + ram_bitmap_pages = last_ram_offset() >> TARGET_PAGE_BITS; + migration_bitmap = bitmap_new(ram_bitmap_pages); + bitmap_set(migration_bitmap, 0, ram_bitmap_pages); + + /* + * Count the total number of pages used by ram blocks not including any + * gaps due to alignment or unplugs. + */ + migration_dirty_pages = ram_bytes_total() >> TARGET_PAGE_BITS; + + memory_global_dirty_log_start(); + migration_bitmap_sync(); + qemu_mutex_unlock_ramlist(); + qemu_mutex_unlock_iothread(); + + qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE); + + QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { + qemu_put_byte(f, strlen(block->idstr)); + qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr)); + qemu_put_be64(f, block->used_length); + } + + rcu_read_unlock(); + + ram_control_before_iterate(f, RAM_CONTROL_SETUP); + ram_control_after_iterate(f, RAM_CONTROL_SETUP); + + qemu_put_be64(f, RAM_SAVE_FLAG_EOS); + + return 0; +} + +static int ram_save_iterate(QEMUFile *f, void *opaque) +{ + int ret; + int i; + int64_t t0; + int pages_sent = 0; + + rcu_read_lock(); + if (ram_list.version != last_version) { + reset_ram_globals(); + } + + /* Read version before ram_list.blocks */ + smp_rmb(); + + ram_control_before_iterate(f, RAM_CONTROL_ROUND); + + t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); + i = 0; + while ((ret = qemu_file_rate_limit(f)) == 0) { + int pages; + + pages = ram_find_and_save_block(f, false, &bytes_transferred); + /* no more pages to sent */ + if (pages == 0) { + break; + } + pages_sent += pages; + acct_info.iterations++; + check_guest_throttling(); + /* we want to check in the 1st loop, just in case it was the 1st time + and we had to sync the dirty bitmap. + qemu_get_clock_ns() is a bit expensive, so we only check each some + iterations + */ + if ((i & 63) == 0) { + uint64_t t1 = (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - t0) / 1000000; + if (t1 > MAX_WAIT) { + DPRINTF("big wait: %" PRIu64 " milliseconds, %d iterations\n", + t1, i); + break; + } + } + i++; + } + rcu_read_unlock(); + + /* + * Must occur before EOS (or any QEMUFile operation) + * because of RDMA protocol. + */ + ram_control_after_iterate(f, RAM_CONTROL_ROUND); + + qemu_put_be64(f, RAM_SAVE_FLAG_EOS); + bytes_transferred += 8; + + ret = qemu_file_get_error(f); + if (ret < 0) { + return ret; + } + + return pages_sent; +} + +/* Called with iothread lock */ +static int ram_save_complete(QEMUFile *f, void *opaque) +{ + rcu_read_lock(); + + migration_bitmap_sync(); + + ram_control_before_iterate(f, RAM_CONTROL_FINISH); + + /* try transferring iterative blocks of memory */ + + /* flush all remaining blocks regardless of rate limiting */ + while (true) { + int pages; + + pages = ram_find_and_save_block(f, true, &bytes_transferred); + /* no more blocks to sent */ + if (pages == 0) { + break; + } + } + + ram_control_after_iterate(f, RAM_CONTROL_FINISH); + migration_end(); + + rcu_read_unlock(); + qemu_put_be64(f, RAM_SAVE_FLAG_EOS); + + return 0; +} + +static uint64_t ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size) +{ + uint64_t remaining_size; + + remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE; + + if (remaining_size < max_size) { + qemu_mutex_lock_iothread(); + rcu_read_lock(); + migration_bitmap_sync(); + rcu_read_unlock(); + qemu_mutex_unlock_iothread(); + remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE; + } + return remaining_size; +} + +static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host) +{ + unsigned int xh_len; + int xh_flags; + + if (!xbzrle_decoded_buf) { + xbzrle_decoded_buf = g_malloc(TARGET_PAGE_SIZE); + } + + /* extract RLE header */ + xh_flags = qemu_get_byte(f); + xh_len = qemu_get_be16(f); + + if (xh_flags != ENCODING_FLAG_XBZRLE) { + error_report("Failed to load XBZRLE page - wrong compression!"); + return -1; + } + + if (xh_len > TARGET_PAGE_SIZE) { + error_report("Failed to load XBZRLE page - len overflow!"); + return -1; + } + /* load data and decode */ + qemu_get_buffer(f, xbzrle_decoded_buf, xh_len); + + /* decode RLE */ + if (xbzrle_decode_buffer(xbzrle_decoded_buf, xh_len, host, + TARGET_PAGE_SIZE) == -1) { + error_report("Failed to load XBZRLE page - decode error!"); + return -1; + } + + return 0; +} + +/* Must be called from within a rcu critical section. + * Returns a pointer from within the RCU-protected ram_list. + */ +static inline void *host_from_stream_offset(QEMUFile *f, + ram_addr_t offset, + int flags) +{ + static RAMBlock *block = NULL; + char id[256]; + uint8_t len; + + if (flags & RAM_SAVE_FLAG_CONTINUE) { + if (!block || block->max_length <= offset) { + error_report("Ack, bad migration stream!"); + return NULL; + } + + return memory_region_get_ram_ptr(block->mr) + offset; + } + + len = qemu_get_byte(f); + qemu_get_buffer(f, (uint8_t *)id, len); + id[len] = 0; + + QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { + if (!strncmp(id, block->idstr, sizeof(id)) && + block->max_length > offset) { + return memory_region_get_ram_ptr(block->mr) + offset; + } + } + + error_report("Can't find block %s!", id); + return NULL; +} + +/* + * If a page (or a whole RDMA chunk) has been + * determined to be zero, then zap it. + */ +void ram_handle_compressed(void *host, uint8_t ch, uint64_t size) +{ + if (ch != 0 || !is_zero_range(host, size)) { + memset(host, ch, size); + } +} + +static int ram_load(QEMUFile *f, void *opaque, int version_id) +{ + int flags = 0, ret = 0; + static uint64_t seq_iter; + + seq_iter++; + + if (version_id != 4) { + ret = -EINVAL; + } + + /* This RCU critical section can be very long running. + * When RCU reclaims in the code start to become numerous, + * it will be necessary to reduce the granularity of this + * critical section. + */ + rcu_read_lock(); + while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) { + ram_addr_t addr, total_ram_bytes; + void *host; + uint8_t ch; + + addr = qemu_get_be64(f); + flags = addr & ~TARGET_PAGE_MASK; + addr &= TARGET_PAGE_MASK; + + switch (flags & ~RAM_SAVE_FLAG_CONTINUE) { + case RAM_SAVE_FLAG_MEM_SIZE: + /* Synchronize RAM block list */ + total_ram_bytes = addr; + while (!ret && total_ram_bytes) { + RAMBlock *block; + uint8_t len; + char id[256]; + ram_addr_t length; + + len = qemu_get_byte(f); + qemu_get_buffer(f, (uint8_t *)id, len); + id[len] = 0; + length = qemu_get_be64(f); + + QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { + if (!strncmp(id, block->idstr, sizeof(id))) { + if (length != block->used_length) { + Error *local_err = NULL; + + ret = qemu_ram_resize(block->offset, length, &local_err); + if (local_err) { + error_report_err(local_err); + } + } + ram_control_load_hook(f, RAM_CONTROL_BLOCK_REG, + block->idstr); + break; + } + } + + if (!block) { + error_report("Unknown ramblock \"%s\", cannot " + "accept migration", id); + ret = -EINVAL; + } + + total_ram_bytes -= length; + } + break; + case RAM_SAVE_FLAG_COMPRESS: + host = host_from_stream_offset(f, addr, flags); + if (!host) { + error_report("Illegal RAM offset " RAM_ADDR_FMT, addr); + ret = -EINVAL; + break; + } + ch = qemu_get_byte(f); + ram_handle_compressed(host, ch, TARGET_PAGE_SIZE); + break; + case RAM_SAVE_FLAG_PAGE: + host = host_from_stream_offset(f, addr, flags); + if (!host) { + error_report("Illegal RAM offset " RAM_ADDR_FMT, addr); + ret = -EINVAL; + break; + } + qemu_get_buffer(f, host, TARGET_PAGE_SIZE); + break; + case RAM_SAVE_FLAG_XBZRLE: + host = host_from_stream_offset(f, addr, flags); + if (!host) { + error_report("Illegal RAM offset " RAM_ADDR_FMT, addr); + ret = -EINVAL; + break; + } + if (load_xbzrle(f, addr, host) < 0) { + error_report("Failed to decompress XBZRLE page at " + RAM_ADDR_FMT, addr); + ret = -EINVAL; + break; + } + break; + case RAM_SAVE_FLAG_EOS: + /* normal exit */ + break; + default: + if (flags & RAM_SAVE_FLAG_HOOK) { + ram_control_load_hook(f, RAM_CONTROL_HOOK, NULL); + } else { + error_report("Unknown combination of migration flags: %#x", + flags); + ret = -EINVAL; + } + } + if (!ret) { + ret = qemu_file_get_error(f); + } + } + + rcu_read_unlock(); + DPRINTF("Completed load of VM with exit code %d seq iteration " + "%" PRIu64 "\n", ret, seq_iter); + return ret; +} + +static SaveVMHandlers savevm_ram_handlers = { + .save_live_setup = ram_save_setup, + .save_live_iterate = ram_save_iterate, + .save_live_complete = ram_save_complete, + .save_live_pending = ram_save_pending, + .load_state = ram_load, + .cancel = ram_migration_cancel, +}; + +void ram_mig_init(void) +{ + qemu_mutex_init(&XBZRLE.lock); + register_savevm_live(NULL, "ram", 0, 4, &savevm_ram_handlers, NULL); +} + +/* Stub function that's gets run on the vcpu when its brought out of the + VM to run inside qemu via async_run_on_cpu()*/ +static void mig_sleep_cpu(void *opq) +{ + qemu_mutex_unlock_iothread(); + g_usleep(30*1000); + qemu_mutex_lock_iothread(); +} + +/* To reduce the dirty rate explicitly disallow the VCPUs from spending + much time in the VM. The migration thread will try to catchup. + Workload will experience a performance drop. +*/ +static void mig_throttle_guest_down(void) +{ + CPUState *cpu; + + qemu_mutex_lock_iothread(); + CPU_FOREACH(cpu) { + async_run_on_cpu(cpu, mig_sleep_cpu, NULL); + } + qemu_mutex_unlock_iothread(); +} + +static void check_guest_throttling(void) +{ + static int64_t t0; + int64_t t1; + + if (!mig_throttle_on) { + return; + } + + if (!t0) { + t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); + return; + } + + t1 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); + + /* If it has been more than 40 ms since the last time the guest + * was throttled then do it again. + */ + if (40 < (t1-t0)/1000000) { + mig_throttle_guest_down(); + t0 = t1; + } +} diff --git a/trace-events b/trace-events index 482dc01..450022d 100644 --- a/trace-events +++ b/trace-events @@ -1193,7 +1193,7 @@ vmstate_subsection_load_good(const char *parent) "%s" # qemu-file.c qemu_file_fclose(void) "" -# arch_init.c +# migration/ram.c migration_bitmap_sync_start(void) "" migration_bitmap_sync_end(uint64_t dirty_pages) "dirty_pages %" PRIu64"" migration_throttle(void) "" -- 1.8.3.1