[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-devel] [PATCH] exec: limit system memory size
From: |
Paolo Bonzini |
Subject: |
Re: [Qemu-devel] [PATCH] exec: limit system memory size |
Date: |
Mon, 04 Nov 2013 11:54:34 +0100 |
User-agent: |
Mozilla/5.0 (X11; Linux x86_64; rv:17.0) Gecko/20130923 Thunderbird/17.0.9 |
Il 04/11/2013 11:07, Michael S. Tsirkin ha scritto:
> On Mon, Nov 04, 2013 at 11:50:05AM +0200, Marcel Apfelbaum wrote:
>> On Mon, 2013-11-04 at 08:06 +0200, Michael S. Tsirkin wrote:
>>> The page table logic in exec.c assumes
>>> that memory addresses are at most TARGET_PHYS_ADDR_SPACE_BITS.
>>>
>>> But pci addresses are full 64 bit so if we try to render them ignoring
>>> the extra bits, we get strange effects with sections overlapping each
>>> other.
>>>
>>> To fix, simply limit the system memory size to
>>> 1 << TARGET_PHYS_ADDR_SPACE_BITS,
>>> pci addresses will be rendered within that.
>>>
>>> Signed-off-by: Michael S. Tsirkin <address@hidden>
>>> ---
>>> exec.c | 6 +++++-
>>> 1 file changed, 5 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/exec.c b/exec.c
>>> index 030118e..c7a8df5 100644
>>> --- a/exec.c
>>> +++ b/exec.c
>>> @@ -1801,7 +1801,12 @@ void address_space_destroy_dispatch(AddressSpace *as)
>>> static void memory_map_init(void)
>>> {
>>> system_memory = g_malloc(sizeof(*system_memory));
>>> - memory_region_init(system_memory, NULL, "system", INT64_MAX);
>>> +
>>> + assert(TARGET_PHYS_ADDR_SPACE_BITS <= 64);
>>> +
>>> + memory_region_init(system_memory, NULL, "system",
>>> + TARGET_PHYS_ADDR_SPACE_BITS == 64 ?
>>> + UINT64_MAX : (0x1ULL <<
>>> TARGET_PHYS_ADDR_SPACE_BITS));
>>
>> Michael, thanks again for the help.
>>
>> I am concerned that we cannot use all the UINT64_MAX
>> address space.
>
> Well, exec isn't ready for this, it expects at most
> TARGET_PHYS_ADDR_SPACE_BITS.
> Fortunately there's no way for CPU to initiate io outside
> this area.
>
> So this is another place where device to device IO
> would be broken.
However, firmware that places BARs where the CPU cannot access them
would be "interesting" to say the least. This applies both to device-
device I/O and to non-aligned <4K BARs.
This patch looks good; however, on top of it can you test
kvm-unit-tests with TARGET_PHYS_ADDR_SPACE_BITS=64 and see whether
there is a measurable slowdown (in the inl_from_qemu tests)? If not,
we can just get rid of TARGET_PHYS_ADDR_SPACE_BITS in exec.c.
Note that L2_BITS is shared between translate-all.c and exec.c only for
historical reasons (the translate-all.c code used to be in exec.c). It
is probably a good idea to split them like this:
diff --git a/exec.c b/exec.c
index 2e31ffc..3faea0e 100644
--- a/exec.c
+++ b/exec.c
@@ -88,7 +88,15 @@ struct PhysPageEntry {
uint16_t ptr : 15;
};
-typedef PhysPageEntry Node[L2_SIZE];
+/* Size of the L2 (and L3, etc) page tables. */
+#define ADDR_SPACE_BITS 64
+
+#define P_L2_BITS 10
+#define P_L2_SIZE (1 << P_L2_BITS)
+
+#define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) +
1)
+
+typedef PhysPageEntry Node[P_L2_SIZE];
struct AddressSpaceDispatch {
/* This is a multi-level map on the physical address space.
@@ -155,7 +163,7 @@ static uint16_t phys_map_node_alloc(void)
ret = next_map.nodes_nb++;
assert(ret != PHYS_MAP_NODE_NIL);
assert(ret != next_map.nodes_nb_alloc);
- for (i = 0; i < L2_SIZE; ++i) {
+ for (i = 0; i < P_L2_SIZE; ++i) {
next_map.nodes[ret][i].is_leaf = 0;
next_map.nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
}
@@ -168,13 +176,13 @@ static void phys_page_set_level(PhysPageEntry *lp, hwaddr
*index,
{
PhysPageEntry *p;
int i;
- hwaddr step = (hwaddr)1 << (level * L2_BITS);
+ hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
if (!lp->is_leaf && lp->ptr == PHYS_MAP_NODE_NIL) {
lp->ptr = phys_map_node_alloc();
p = next_map.nodes[lp->ptr];
if (level == 0) {
- for (i = 0; i < L2_SIZE; i++) {
+ for (i = 0; i < P_L2_SIZE; i++) {
p[i].is_leaf = 1;
p[i].ptr = PHYS_SECTION_UNASSIGNED;
}
@@ -182,9 +190,9 @@ static void phys_page_set_level(PhysPageEntry *lp, hwaddr
*index,
} else {
p = next_map.nodes[lp->ptr];
}
- lp = &p[(*index >> (level * L2_BITS)) & (L2_SIZE - 1)];
+ lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
- while (*nb && lp < &p[L2_SIZE]) {
+ while (*nb && lp < &p[P_L2_SIZE]) {
if ((*index & (step - 1)) == 0 && *nb >= step) {
lp->is_leaf = true;
lp->ptr = leaf;
@@ -218,7 +226,7 @@ static MemoryRegionSection *phys_page_find(PhysPageEntry
lp, hwaddr index,
return §ions[PHYS_SECTION_UNASSIGNED];
}
p = nodes[lp.ptr];
- lp = p[(index >> (i * L2_BITS)) & (L2_SIZE - 1)];
+ lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
}
return §ions[lp.ptr];
}
diff --git a/translate-all.c b/translate-all.c
index aeda54d..1c63d78 100644
--- a/translate-all.c
+++ b/translate-all.c
@@ -96,12 +96,16 @@ typedef struct PageDesc {
# define L1_MAP_ADDR_SPACE_BITS TARGET_VIRT_ADDR_SPACE_BITS
#endif
+/* Size of the L2 (and L3, etc) page tables. */
+#define V_L2_BITS 10
+#define V_L2_SIZE (1 << V_L2_BITS)
+
/* The bits remaining after N lower levels of page tables. */
#define V_L1_BITS_REM \
- ((L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % L2_BITS)
+ ((L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % V_L2_BITS)
#if V_L1_BITS_REM < 4
-#define V_L1_BITS (V_L1_BITS_REM + L2_BITS)
+#define V_L1_BITS (V_L1_BITS_REM + V_L2_BITS)
#else
#define V_L1_BITS V_L1_BITS_REM
#endif
@@ -395,18 +399,18 @@ static PageDesc *page_find_alloc(tb_page_addr_t index,
int alloc)
lp = l1_map + ((index >> V_L1_SHIFT) & (V_L1_SIZE - 1));
/* Level 2..N-1. */
- for (i = V_L1_SHIFT / L2_BITS - 1; i > 0; i--) {
+ for (i = V_L1_SHIFT / V_L2_BITS - 1; i > 0; i--) {
void **p = *lp;
if (p == NULL) {
if (!alloc) {
return NULL;
}
- ALLOC(p, sizeof(void *) * L2_SIZE);
+ ALLOC(p, sizeof(void *) * V_L2_SIZE);
*lp = p;
}
- lp = p + ((index >> (i * L2_BITS)) & (L2_SIZE - 1));
+ lp = p + ((index >> (i * V_L2_BITS)) & (V_L2_SIZE - 1));
}
pd = *lp;
@@ -414,13 +418,13 @@ static PageDesc *page_find_alloc(tb_page_addr_t index,
int alloc)
if (!alloc) {
return NULL;
}
- ALLOC(pd, sizeof(PageDesc) * L2_SIZE);
+ ALLOC(pd, sizeof(PageDesc) * V_L2_SIZE);
*lp = pd;
}
#undef ALLOC
- return pd + (index & (L2_SIZE - 1));
+ return pd + (index & (V_L2_SIZE - 1));
}
static inline PageDesc *page_find(tb_page_addr_t index)
@@ -655,14 +659,14 @@ static void page_flush_tb_1(int level, void **lp)
if (level == 0) {
PageDesc *pd = *lp;
- for (i = 0; i < L2_SIZE; ++i) {
+ for (i = 0; i < V_L2_SIZE; ++i) {
pd[i].first_tb = NULL;
invalidate_page_bitmap(pd + i);
}
} else {
void **pp = *lp;
- for (i = 0; i < L2_SIZE; ++i) {
+ for (i = 0; i < V_L2_SIZE; ++i) {
page_flush_tb_1(level - 1, pp + i);
}
}
@@ -673,7 +677,7 @@ static void page_flush_tb(void)
int i;
for (i = 0; i < V_L1_SIZE; i++) {
- page_flush_tb_1(V_L1_SHIFT / L2_BITS - 1, l1_map + i);
+ page_flush_tb_1(V_L1_SHIFT / V_L2_BITS - 1, l1_map + i);
}
}
@@ -1600,7 +1604,7 @@ static int walk_memory_regions_1(struct
walk_memory_regions_data *data,
if (level == 0) {
PageDesc *pd = *lp;
- for (i = 0; i < L2_SIZE; ++i) {
+ for (i = 0; i < V_L2_SIZE; ++i) {
int prot = pd[i].flags;
pa = base | (i << TARGET_PAGE_BITS);
@@ -1614,9 +1618,9 @@ static int walk_memory_regions_1(struct
walk_memory_regions_data *data,
} else {
void **pp = *lp;
- for (i = 0; i < L2_SIZE; ++i) {
+ for (i = 0; i < V_L2_SIZE; ++i) {
pa = base | ((abi_ulong)i <<
- (TARGET_PAGE_BITS + L2_BITS * level));
+ (TARGET_PAGE_BITS + V_L2_BITS * level));
rc = walk_memory_regions_1(data, pa, level - 1, pp + i);
if (rc != 0) {
return rc;
@@ -1639,7 +1643,7 @@ int walk_memory_regions(void *priv,
walk_memory_regions_fn fn)
for (i = 0; i < V_L1_SIZE; i++) {
int rc = walk_memory_regions_1(&data, (abi_ulong)i << V_L1_SHIFT,
- V_L1_SHIFT / L2_BITS - 1, l1_map + i);
+ V_L1_SHIFT / V_L2_BITS - 1, l1_map + i);
if (rc != 0) {
return rc;
diff --git a/translate-all.h b/translate-all.h
index 5c38819..f7e5932 100644
--- a/translate-all.h
+++ b/translate-all.h
@@ -19,13 +19,6 @@
#ifndef TRANSLATE_ALL_H
#define TRANSLATE_ALL_H
-/* Size of the L2 (and L3, etc) page tables. */
-#define L2_BITS 10
-#define L2_SIZE (1 << L2_BITS)
-
-#define P_L2_LEVELS \
- (((TARGET_PHYS_ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / L2_BITS) + 1)
-
/* translate-all.c */
void tb_invalidate_phys_page_fast(tb_page_addr_t start, int len);
void cpu_unlink_tb(CPUState *cpu);
Paolo
Re: [Qemu-devel] [PATCH] exec: limit system memory size, Paolo Bonzini, 2013/11/04