diff --git a/i386/i386/db_interface.c b/i386/i386/db_interface.c index d149adc..ebc9a2a 100644 --- a/i386/i386/db_interface.c +++ b/i386/i386/db_interface.c @@ -280,7 +280,7 @@ db_user_to_kernel_address( err = vm_fault( task->map, trunc_page(addr), VM_PROT_READ, - FALSE, FALSE, 0); + FALSE); if (err == KERN_SUCCESS) goto retry; } diff --git a/i386/i386/kttd_interface.c b/i386/i386/kttd_interface.c index b9e0624..12f7e87 100644 --- a/i386/i386/kttd_interface.c +++ b/i386/i386/kttd_interface.c @@ -238,8 +238,7 @@ boolean_t kttd_mem_access(vm_offset_t offset, vm_prot_t access) if (kttd_debug) printf(">>>>>>>>>>Faulting in memory: 0x%x, 0x%x\n", trunc_page(offset), access); - code = vm_fault(kernel_map, trunc_page(offset), access, FALSE, - FALSE, (void (*)()) 0); + code = vm_fault(kernel_map, trunc_page(offset), access, FALSE); }else{ /* * Check for user thread @@ -249,8 +248,7 @@ boolean_t kttd_mem_access(vm_offset_t offset, vm_prot_t access) (current_thread()->task->map->pmap != kernel_pmap) && (current_thread()->task->map->pmap != PMAP_NULL)) { code = vm_fault(current_thread()->task->map, - trunc_page(offset), access, FALSE, - FALSE, (void (*)()) 0); + trunc_page(offset), access, FALSE); }else{ /* * Invalid kernel address (below VM_MIN_KERNEL_ADDRESS) diff --git a/i386/i386/trap.c b/i386/i386/trap.c index 28a9e0c..d7db354 100644 --- a/i386/i386/trap.c +++ b/i386/i386/trap.c @@ -261,9 +261,7 @@ dump_ss(regs); result = vm_fault(map, trunc_page((vm_offset_t)subcode), VM_PROT_READ|VM_PROT_WRITE, - FALSE, - FALSE, - (void (*)()) 0); + FALSE); #if MACH_KDB if (result == KERN_SUCCESS) { /* Look for watchpoints */ @@ -520,9 +518,7 @@ printf("user trap %d error %d sub %08x\n", type, code, subcode); (regs->err & T_PF_WRITE) ? VM_PROT_READ|VM_PROT_WRITE : VM_PROT_READ, - FALSE, - FALSE, - user_page_fault_continue); + FALSE); /*NOTREACHED*/ break; diff --git a/i386/intel/read_fault.c b/i386/intel/read_fault.c index 762f60d..efa4fe9 100644 --- a/i386/intel/read_fault.c +++ b/i386/intel/read_fault.c @@ -26,6 +26,7 @@ #include #include +#include #include #include #include @@ -52,6 +53,8 @@ intel_read_fault(map, vaddr) vm_page_t top_page; /* Placeholder page */ boolean_t wired; /* Is map region wired? */ kern_return_t result; + vm_behavior_t behavior; + vm_offset_t lo_offset, hi_offset; register vm_page_t m; RetryFault: @@ -61,7 +64,8 @@ intel_read_fault(map, vaddr) * to begin search. */ result = vm_map_lookup(&map, vaddr, VM_PROT_READ, &version, - &object, &offset, &prot, &wired); + &object, &offset, &prot, &wired, + &behavior, &lo_offset, &hi_offset); if (result != KERN_SUCCESS) return (result); @@ -74,8 +78,8 @@ intel_read_fault(map, vaddr) vm_object_paging_begin(object); result = vm_fault_page(object, offset, VM_PROT_READ, FALSE, TRUE, - &prot, &result_page, &top_page, - FALSE, (void (*)()) 0); + lo_offset, hi_offset, behavior, + &prot, &result_page, &top_page); if (result != VM_FAULT_SUCCESS) { vm_object_deallocate(object); @@ -134,7 +138,7 @@ intel_read_fault(map, vaddr) result = vm_map_lookup(&map, vaddr, VM_PROT_READ, &version, &retry_object, &retry_offset, &retry_prot, - &wired); + &wired, &behavior, &lo_offset, &hi_offset); if (result != KERN_SUCCESS) { vm_object_lock(m->object); RELEASE_PAGE(m); diff --git a/include/mach/mach.defs b/include/mach/mach.defs index 4531a22..23f97b4 100644 --- a/include/mach/mach.defs +++ b/include/mach/mach.defs @@ -306,7 +306,20 @@ routine mach_ports_lookup( out init_port_set : mach_port_array_t = ^array[] of mach_port_t); -skip; /* old u*x_pid */ +/* + * Set the paging behavior attribute for the specified range + * of the virtual address space of the target task. + * The behavior value is one of {default, random, forward + * sequential, reverse sequential} and indicates the expected + * page reference pattern for the specified range. + */ +/* was old u*x_pid */ +routine vm_behavior_set( + target_task : vm_task_t; + address : vm_address_t; + size : vm_size_t; + new_behavior : vm_behavior_t); + skip; /* old netipc_listen */ skip; /* old netipc_ignore */ @@ -349,7 +362,8 @@ routine memory_object_get_attributes( memory_control : memory_object_control_t; out object_ready : boolean_t; out may_cache : boolean_t; - out copy_strategy : memory_object_copy_strategy_t); + out copy_strategy : memory_object_copy_strategy_t; + out cluster_size : vm_size_t); /* * Sets the default memory manager, the port to which @@ -693,7 +707,8 @@ simpleroutine memory_object_set_attributes( memory_control : memory_object_control_t; object_ready : boolean_t; may_cache : boolean_t; - copy_strategy : memory_object_copy_strategy_t); + copy_strategy : memory_object_copy_strategy_t; + cluster_size : vm_size_t); /* */ @@ -735,12 +750,14 @@ simpleroutine memory_object_data_supply( simpleroutine memory_object_ready( memory_control : memory_object_control_t; may_cache : boolean_t; - copy_strategy : memory_object_copy_strategy_t); + copy_strategy : memory_object_copy_strategy_t; + cluster_size : vm_size_t); simpleroutine memory_object_change_attributes( memory_control : memory_object_control_t; may_cache : boolean_t; copy_strategy : memory_object_copy_strategy_t; + cluster_size : vm_size_t; reply_to : mach_port_t = MACH_MSG_TYPE_MAKE_SEND_ONCE|polymorphic); diff --git a/include/mach/mach_types.defs b/include/mach/mach_types.defs index 4e448b8..211883f 100644 --- a/include/mach/mach_types.defs +++ b/include/mach/mach_types.defs @@ -118,6 +118,7 @@ type vm_size_t = natural_t; type vm_prot_t = int; type vm_inherit_t = int; type vm_statistics_data_t = struct[13] of integer_t; +type vm_behavior_t = int; type vm_machine_attribute_t = int; type vm_machine_attribute_val_t = int; diff --git a/include/mach/memory_object.defs b/include/mach/memory_object.defs index ea7989a..8002ba1 100644 --- a/include/mach/memory_object.defs +++ b/include/mach/memory_object.defs @@ -303,4 +303,5 @@ simpleroutine memory_object_change_completed( msgseqno seqno : mach_port_seqno_t; #endif /* SEQNOS */ may_cache : boolean_t; - copy_strategy : memory_object_copy_strategy_t); + copy_strategy : memory_object_copy_strategy_t; + cluster_size : vm_size_t); diff --git a/include/mach/syscall_sw.h b/include/mach/syscall_sw.h index af14c8d..cec712c 100644 --- a/include/mach/syscall_sw.h +++ b/include/mach/syscall_sw.h @@ -82,6 +82,7 @@ kernel_trap(nw_select,-95,3) kernel_trap(syscall_vm_map,-64,11) kernel_trap(syscall_vm_allocate,-65,4) kernel_trap(syscall_vm_deallocate,-66,3) +kernel_trap(syscall_vm_behavior_set,-67,4) kernel_trap(syscall_task_create,-68,3) kernel_trap(syscall_task_terminate,-69,1) diff --git a/include/mach/vm_behavior.h b/include/mach/vm_behavior.h new file mode 100644 index 0000000..d9154a0 --- /dev/null +++ b/include/mach/vm_behavior.h @@ -0,0 +1,55 @@ +/* + * Copyright 1991-1998 by Open Software Foundation, Inc. + * All Rights Reserved + * + * Permission to use, copy, modify, and distribute this software and + * its documentation for any purpose and without fee is hereby granted, + * provided that the above copyright notice appears in all copies and + * that both the copyright notice and this permission notice appear in + * supporting documentation. + * + * OSF DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE + * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE. + * + * IN NO EVENT SHALL OSF BE LIABLE FOR ANY SPECIAL, INDIRECT, OR + * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM + * LOSS OF USE, DATA OR PROFITS, WHETHER IN ACTION OF CONTRACT, + * NEGLIGENCE, OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION + * WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +/* + * MkLinux + */ +/* + * File: mach/vm_behavior.h + * + * Virtual memory map behavior definitions. + * + */ + +#ifndef _MACH_VM_BEHAVIOR_H_ +#define _MACH_VM_BEHAVIOR_H_ + +/* + * Types defined: + * + * vm_behavior_t behavior codes. + */ + +typedef int vm_behavior_t; + +/* + * Enumeration of valid values for vm_behavior_t. + * These describe expected page reference behavior for + * for a given range of virtual memory. For implementation + * details see vm/vm_fault.c + */ + + +#define VM_BEHAVIOR_DEFAULT ((vm_behavior_t) 0) /* default */ +#define VM_BEHAVIOR_RANDOM ((vm_behavior_t) 1) /* random */ +#define VM_BEHAVIOR_SEQUENTIAL ((vm_behavior_t) 2) /* forward sequential */ +#define VM_BEHAVIOR_RSEQNTL ((vm_behavior_t) 3) /* reverse sequential */ + +#endif /*_MACH_VM_BEHAVIOR_H_*/ diff --git a/kern/bootstrap.c b/kern/bootstrap.c index c07b032..6a01596 100644 --- a/kern/bootstrap.c +++ b/kern/bootstrap.c @@ -504,7 +504,7 @@ static void copy_bootstrap(void *e, exec_info_t *boot_exec_info) load_protect_text ? VM_PROT_READ|VM_PROT_EXECUTE : VM_PROT_READ|VM_PROT_EXECUTE | VM_PROT_WRITE, - 0,0,0); + 0); i = round_page (i+1); } } diff --git a/kern/ipc_mig.c b/kern/ipc_mig.c index 3f55da7..7a990cc 100644 --- a/kern/ipc_mig.c +++ b/kern/ipc_mig.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include @@ -1018,3 +1019,27 @@ syscall_device_writev_request(mach_port_t device_name, device_deallocate(dev); return res; } + +kern_return_t +syscall_vm_behavior_set( + mach_port_t target_map, + vm_offset_t address, + vm_size_t size, + vm_behavior_t new_behavior) +{ + kern_return_t error = KERN_SUCCESS; + vm_map_t map; + + /* + * Translate the map and verify + */ + map = port_name_to_map(target_map); + if (map == VM_MAP_NULL) + return MACH_SEND_INTERRUPTED; + + error = vm_behavior_set(map, address, size, new_behavior); + + vm_map_deallocate(map); + return error; +} + diff --git a/kern/syscall_sw.c b/kern/syscall_sw.c index b2e20e6..7635ba2 100644 --- a/kern/syscall_sw.c +++ b/kern/syscall_sw.c @@ -73,6 +73,7 @@ kern_return_t kern_invalid() extern kern_return_t syscall_vm_map(); extern kern_return_t syscall_vm_allocate(); extern kern_return_t syscall_vm_deallocate(); +extern kern_return_t syscall_vm_behavior_set(); extern kern_return_t syscall_task_create(); extern kern_return_t syscall_task_terminate(); @@ -164,7 +165,7 @@ mach_trap_t mach_trap_table[] = { MACH_TRAP(syscall_vm_map, 11), /* 64 */ MACH_TRAP(syscall_vm_allocate, 4), /* 65 */ MACH_TRAP(syscall_vm_deallocate, 3), /* 66 */ - MACH_TRAP(kern_invalid, 0), /* 67 */ + MACH_TRAP(syscall_vm_behavior_set, 4), /* 67 */ MACH_TRAP(syscall_task_create, 3), /* 68 */ MACH_TRAP(syscall_task_terminate, 1), /* 69 */ diff --git a/vm/memory_object.c b/vm/memory_object.c index 57dde76..1410b5f 100644 --- a/vm/memory_object.c +++ b/vm/memory_object.c @@ -73,6 +73,7 @@ typedef int memory_object_lock_result_t; /* moved from below */ ipc_port_t memory_manager_default = IP_NULL; +vm_size_t memory_manager_default_cluster = 0; decl_simple_lock_data(,memory_manager_default_lock) /* @@ -919,13 +920,15 @@ xxx_memory_object_lock_request(object, offset, size, reply_to, reply_to_type)); } + kern_return_t memory_object_set_attributes_common(object, object_ready, may_cache, - copy_strategy, use_old_pageout) + copy_strategy, cluster_size, use_old_pageout) vm_object_t object; boolean_t object_ready; boolean_t may_cache; memory_object_copy_strategy_t copy_strategy; + vm_size_t cluster_size; boolean_t use_old_pageout; { if (object == VM_OBJECT_NULL) @@ -951,11 +954,23 @@ memory_object_set_attributes_common(object, object_ready, may_cache, if (may_cache) may_cache = TRUE; + if(cluster_size != 0){ + int pages_per_cluster; + pages_per_cluster = atop(cluster_size); + if ((cluster_size & (PAGE_SIZE-1)) || + ((pages_per_cluster-1) & pages_per_cluster)){ + vm_object_deallocate(object); + return(KERN_INVALID_ARGUMENT); + } + + } + vm_object_lock(object); /* * Wake up anyone waiting for the ready attribute * to become asserted. + * XXX Shouldn't that be done after setting all the object's attributes */ if (object_ready && !object->pager_ready) { @@ -969,6 +984,11 @@ memory_object_set_attributes_common(object, object_ready, may_cache, object->can_persist = may_cache; object->pager_ready = object_ready; + + if (cluster_size == 0){ + cluster_size = PAGE_SIZE; + } + object->cluster_size = cluster_size; if (copy_strategy == MEMORY_OBJECT_COPY_TEMPORARY) { object->temporary = TRUE; } else { @@ -988,10 +1008,11 @@ memory_object_set_attributes_common(object, object_ready, may_cache, */ kern_return_t memory_object_change_attributes(object, may_cache, - copy_strategy, reply_to, reply_to_type) + copy_strategy, cluster_size, reply_to, reply_to_type) vm_object_t object; boolean_t may_cache; memory_object_copy_strategy_t copy_strategy; + vm_size_t cluster_size; ipc_port_t reply_to; mach_msg_type_name_t reply_to_type; { @@ -1009,13 +1030,13 @@ kern_return_t memory_object_change_attributes(object, may_cache, */ result = memory_object_set_attributes_common(object, TRUE, may_cache, copy_strategy, - FALSE); + cluster_size, FALSE); if (IP_VALID(reply_to)) { /* consumes our naked send-once/send right for reply_to */ (void) memory_object_change_completed(reply_to, reply_to_type, - may_cache, copy_strategy); + may_cache, copy_strategy, cluster_size); } @@ -1023,33 +1044,36 @@ kern_return_t memory_object_change_attributes(object, may_cache, } kern_return_t -memory_object_set_attributes(object, object_ready, may_cache, copy_strategy) +memory_object_set_attributes(object, object_ready, may_cache, copy_strategy, cluster_size) vm_object_t object; boolean_t object_ready; boolean_t may_cache; memory_object_copy_strategy_t copy_strategy; + vm_size_t cluster_size; { return memory_object_set_attributes_common(object, object_ready, may_cache, copy_strategy, - TRUE); + cluster_size, TRUE); } -kern_return_t memory_object_ready(object, may_cache, copy_strategy) +kern_return_t memory_object_ready(object, may_cache, copy_strategy, cluster_size) vm_object_t object; boolean_t may_cache; memory_object_copy_strategy_t copy_strategy; + vm_size_t cluster_size; { return memory_object_set_attributes_common(object, TRUE, may_cache, copy_strategy, - FALSE); + cluster_size, FALSE); } kern_return_t memory_object_get_attributes(object, object_ready, - may_cache, copy_strategy) + may_cache, copy_strategy, cluster_size) vm_object_t object; boolean_t *object_ready; boolean_t *may_cache; memory_object_copy_strategy_t *copy_strategy; + vm_size_t *cluster_size; { if (object == VM_OBJECT_NULL) return(KERN_INVALID_ARGUMENT); @@ -1058,6 +1082,7 @@ kern_return_t memory_object_get_attributes(object, object_ready, *may_cache = object->can_persist; *object_ready = object->pager_ready; *copy_strategy = object->copy_strategy; + *cluster_size = object->cluster_size; vm_object_unlock(object); vm_object_deallocate(object); @@ -1121,7 +1146,7 @@ kern_return_t vm_set_default_memory_manager(host, default_manager) * valid (not IP_NULL or IP_DEAD). */ -ipc_port_t memory_manager_default_reference(void) +ipc_port_t memory_manager_default_reference(vm_size_t *cluster_size) { ipc_port_t current_manager; @@ -1134,6 +1159,7 @@ ipc_port_t memory_manager_default_reference(void) FALSE); simple_lock(&memory_manager_default_lock); } + *cluster_size = memory_manager_default_cluster; simple_unlock(&memory_manager_default_lock); diff --git a/vm/memory_object.h b/vm/memory_object.h index ee0c963..eed2572 100644 --- a/vm/memory_object.h +++ b/vm/memory_object.h @@ -30,7 +30,7 @@ #include #include -extern ipc_port_t memory_manager_default_reference(void); +extern ipc_port_t memory_manager_default_reference(vm_size_t *cluster_size); extern boolean_t memory_manager_default_port(ipc_port_t); extern void memory_manager_default_init(void); diff --git a/vm/vm_fault.c b/vm/vm_fault.c index cce043a..2071d4b 100644 --- a/vm/vm_fault.c +++ b/vm/vm_fault.c @@ -61,6 +61,8 @@ +#if 0 + /* * State needed by vm_fault_continue. * This is a little hefty to drop directly @@ -87,6 +89,8 @@ typedef struct vm_fault_state { zone_t vm_fault_state_zone = 0; +#endif + int vm_object_absent_max = 50; int vm_fault_debug = 0; @@ -107,10 +111,12 @@ extern struct db_watchpoint *db_watchpoint_list; */ void vm_fault_init(void) { + #if 0 vm_fault_state_zone = zinit(sizeof(vm_fault_state_t), 0, THREAD_MAX * sizeof(vm_fault_state_t), sizeof(vm_fault_state_t), 0, "vm fault state"); + #endif } /* @@ -166,6 +172,15 @@ vm_fault_cleanup(object, top_page) #define vm_stat_sample(x) #endif /* MACH_PCSAMPLE */ +/* XXX - temporary */ +boolean_t vm_allow_clustered_pagein = TRUE; +int vm_pagein_cluster_used = 0; + +/* + * Prepage default sizes given VM_BEHAVIOR_DEFAULT reference behavior + */ +int vm_default_ahead = 1; /* Number of pages to prepage ahead */ +int vm_default_behind = 0; /* Number of pages to prepage behind */ /* @@ -209,15 +224,18 @@ vm_fault_cleanup(object, top_page) */ vm_fault_return_t vm_fault_page(first_object, first_offset, fault_type, must_be_resident, interruptible, + lo_offset, hi_offset, behavior, protection, - result_page, top_page, - resume, continuation) + result_page, top_page) /* Arguments: */ vm_object_t first_object; /* Object to begin search */ vm_offset_t first_offset; /* Offset into object */ vm_prot_t fault_type; /* What access is requested */ boolean_t must_be_resident;/* Must page be resident? */ boolean_t interruptible; /* May fault be interrupted? */ + vm_offset_t lo_offset; + vm_offset_t hi_offset; + vm_behavior_t behavior; /* Modifies in place: */ vm_prot_t *protection; /* Protection for mapping */ /* Returns: */ @@ -225,9 +243,6 @@ vm_fault_return_t vm_fault_page(first_object, first_offset, vm_page_t *top_page; /* Page in top object, if * not result_page. */ - /* More arguments: */ - boolean_t resume; /* We are restarting. */ - void (*continuation)(); /* Continuation for blocking. */ { register vm_page_t m; @@ -240,21 +255,9 @@ vm_fault_return_t vm_fault_page(first_object, first_offset, vm_object_t copy_object; boolean_t look_for_page; vm_prot_t access_required; - - if (resume) { - register vm_fault_state_t *state = - (vm_fault_state_t *) current_thread()->ith_other; - - if (state->vmfp_backoff) - goto after_block_and_backoff; - - object = state->vmfp_object; - offset = state->vmfp_offset; - first_m = state->vmfp_first_m; - access_required = state->vmfp_access; - goto after_thread_block; - } - + vm_size_t cluster_size, length; + vm_offset_t cluster_start, cluster_end, paging_offset; + vm_offset_t cluster_offset, align_offset; vm_stat_sample(SAMPLED_PC_VM_FAULTS_ANY); vm_stat.faults++; /* needs lock XXX */ @@ -358,32 +361,8 @@ vm_fault_return_t vm_fault_page(first_object, first_offset, PAGE_ASSERT_WAIT(m, interruptible); vm_object_unlock(object); - if (continuation != (void (*)()) 0) { - register vm_fault_state_t *state = - (vm_fault_state_t *) current_thread()->ith_other; - - /* - * Save variables in case - * thread_block discards - * our kernel stack. - */ - - state->vmfp_backoff = FALSE; - state->vmfp_object = object; - state->vmfp_offset = offset; - state->vmfp_first_m = first_m; - state->vmfp_access = - access_required; - state->vmf_prot = *protection; - - counter(c_vm_fault_page_block_busy_user++); - thread_block(continuation); - } else - { - counter(c_vm_fault_page_block_busy_kernel++); - thread_block((void (*)()) 0); - } - after_thread_block: + counter(c_vm_fault_page_block_busy_kernel++); + thread_block((void (*)()) 0); wait_result = current_thread()->wait_result; vm_object_lock(object); if (wait_result != THREAD_AWAKENED) { @@ -493,6 +472,10 @@ vm_fault_return_t vm_fault_page(first_object, first_offset, VM_PAGE_QUEUES_REMOVE(m); vm_page_unlock_queues(); } + + hi_offset += object->shadow_offset; + lo_offset += object->shadow_offset; + vm_object_lock(next_object); vm_object_unlock(object); object = next_object; @@ -515,7 +498,9 @@ vm_fault_return_t vm_fault_page(first_object, first_offset, vm_object_assert_wait(object, VM_OBJECT_EVENT_PAGER_READY, interruptible); - goto block_and_backoff; + vm_object_unlock(object); + thread_block((void (*)(void))0); + goto backoff; } new_unlock_request = m->unlock_request = @@ -540,7 +525,9 @@ vm_fault_return_t vm_fault_page(first_object, first_offset, } PAGE_ASSERT_WAIT(m, interruptible); - goto block_and_backoff; + vm_object_unlock(object); + thread_block((void(*)(void))0); + goto backoff; } /* @@ -605,7 +592,9 @@ vm_fault_return_t vm_fault_page(first_object, first_offset, VM_OBJECT_EVENT_PAGER_READY, interruptible); VM_PAGE_FREE(m); - goto block_and_backoff; + vm_object_unlock(object); + thread_block((void(*)(void))0); + goto backoff; } if (object->internal) { @@ -631,7 +620,9 @@ vm_fault_return_t vm_fault_page(first_object, first_offset, vm_object_absent_assert_wait(object, interruptible); VM_PAGE_FREE(m); - goto block_and_backoff; + vm_object_unlock(object); + thread_block((void(*)(void))0); + goto backoff; } /* @@ -642,6 +633,209 @@ vm_fault_return_t vm_fault_page(first_object, first_offset, m->absent = TRUE; object->absent_count++; + cluster_start = offset; + length = PAGE_SIZE; + cluster_size = object->cluster_size; + + /* + * Skip clustered pagein if it is globally disabled + * or random page reference behavior is expected + * for the address range containing the faulting + * address or the object paging block size is + * equal to the page size. + */ + if (!vm_allow_clustered_pagein || + behavior == VM_BEHAVIOR_RANDOM || + cluster_size == PAGE_SIZE) + goto no_clustering; + + assert(offset >= lo_offset); + assert(offset < hi_offset); + + /* + * Decide whether to scan ahead or behind for + * additional pages contiguous to the faulted + * page in the same paging block. The decision + * is based on system wide globals and the + * expected page reference behavior of the + * address range contained the faulting address. + * First calculate some constants. + */ + paging_offset = offset + object->paging_offset; + cluster_offset = paging_offset & (cluster_size - 1); + align_offset = paging_offset&(PAGE_SIZE-1); + if (align_offset != 0) { + cluster_offset = trunc_page(cluster_offset); + } + +#define SPANS_CLUSTER(x) ((((x) - align_offset) & (cluster_size - 1)) == 0) + + /* + * Backward scan only if reverse sequential + * behavior has been specified + */ + if (((vm_default_behind != 0 && + behavior == VM_BEHAVIOR_DEFAULT) || + behavior == VM_BEHAVIOR_RSEQNTL) && offset) { + vm_offset_t cluster_bot; + + /* + * Calculate lower search boundary. + * Exclude pages that span a cluster boundary. + * Clip to start of map entry. + * For default page reference behavior, scan + * default pages behind. + */ + cluster_bot = (offset > cluster_offset) ? + offset - cluster_offset : offset; + if (align_offset != 0) { + if ((cluster_bot < offset) && + SPANS_CLUSTER(cluster_bot)) { + cluster_bot += PAGE_SIZE; + } + } + if (behavior == VM_BEHAVIOR_DEFAULT) { + vm_offset_t bot = vm_default_behind*PAGE_SIZE; + + if (cluster_bot < (offset - bot)) + cluster_bot = offset - bot; + } + if (lo_offset > cluster_bot) + cluster_bot = lo_offset; + + for ( cluster_start = offset - PAGE_SIZE; + (cluster_start >= cluster_bot) && + (cluster_start != (align_offset - PAGE_SIZE)); + cluster_start -= PAGE_SIZE) { +retry_cluster_backw: + if ( +#if MACH_PAGEMAP + (vm_external_state_get(object->existence_info, offset + object->paging_offset) != + VM_EXTERNAL_STATE_ABSENT) || +#endif /* MACH_PAGEMAP */ + (vm_page_lookup(object, cluster_start) + != VM_PAGE_NULL)) { + break; + } + if (object->internal) { + /* + * need to acquire a real page in + * advance because this acts as + * a throttling mechanism for + * data_requests to the default + * pager. If this fails, give up + * trying to find any more pages + * in the cluster and send off the + * request for what we already have. + */ + if ((m = vm_page_grab(!object->internal)) + == VM_PAGE_NULL) { + cluster_start += PAGE_SIZE; + cluster_end = offset + PAGE_SIZE; + goto give_up; + } + } else if ((m = vm_page_grab_fictitious()) + == VM_PAGE_NULL) { + vm_object_unlock(object); + vm_page_more_fictitious(); + vm_object_lock(object); + goto retry_cluster_backw; + } + vm_page_insert(m, object, cluster_start); + m->absent = TRUE; + object->absent_count++; + } + cluster_start += PAGE_SIZE; + assert(cluster_start >= cluster_bot); + } + assert(cluster_start <= offset); + + /* + * Forward scan if default or sequential behavior + * specified + */ + if ((behavior == VM_BEHAVIOR_DEFAULT && + vm_default_ahead != 0) || + behavior == VM_BEHAVIOR_SEQUENTIAL) { + vm_offset_t cluster_top; + + /* + * Calculate upper search boundary. + * Exclude pages that span a cluster boundary. + * Clip to end of map entry. + * For default page reference behavior, scan + * default pages ahead. + */ + cluster_top = (offset + cluster_size) - + cluster_offset; + if (align_offset != 0) { + if ((cluster_top > (offset + PAGE_SIZE)) && + SPANS_CLUSTER(cluster_top)) { + cluster_top -= PAGE_SIZE; + } + } + if (behavior == VM_BEHAVIOR_DEFAULT) { + vm_offset_t top = (vm_default_ahead*PAGE_SIZE)+ + PAGE_SIZE; + + if (cluster_top > (offset + top)) + cluster_top = offset + top; + } + if (cluster_top > hi_offset) + cluster_top = hi_offset; + + for (cluster_end = offset + PAGE_SIZE; + cluster_end < cluster_top; + cluster_end += PAGE_SIZE) { + assert(cluster_size > PAGE_SIZE); +retry_cluster_forw: + if ( +#if MACH_PAGEMAP + (vm_external_state_get(object->existence_info, offset + object->paging_offset) != + VM_EXTERNAL_STATE_ABSENT) || +#endif /* MACH_PAGEMAP */ + (vm_page_lookup(object, cluster_end) + != VM_PAGE_NULL)) { + break; + } + if (object->internal) { + /* + * need to acquire a real page in + * advance because this acts as + * a throttling mechanism for + * data_requests to the default + * pager. If this fails, give up + * trying to find any more pages + * in the cluster and send off the + * request for what we already have. + */ + if ((m = vm_page_grab(!object->internal)) + == VM_PAGE_NULL) { + break; + } + } else if ((m = vm_page_grab_fictitious()) + == VM_PAGE_NULL) { + vm_object_unlock(object); + /* The same as above */ + vm_page_more_fictitious(); + vm_object_lock(object); + goto retry_cluster_forw; + } + vm_page_insert(m, object, cluster_end); + m->absent = TRUE; + object->absent_count++; + } + assert(cluster_end <= cluster_top); + } + else { + cluster_end = offset + PAGE_SIZE; + } +give_up: + assert(cluster_end >= offset + PAGE_SIZE); + length = cluster_end - cluster_start; + +no_clustering: + /* * We have a busy page, so we can * release the object lock. @@ -657,8 +851,8 @@ vm_fault_return_t vm_fault_page(first_object, first_offset, if ((rc = memory_object_data_request(object->pager, object->pager_request, - m->offset + object->paging_offset, - PAGE_SIZE, access_required)) != KERN_SUCCESS) { + cluster_start + object->paging_offset, + length, access_required)) != KERN_SUCCESS) { if (rc != MACH_SEND_INTERRUPTED) printf("%s(0x%p, 0x%p, 0x%x, 0x%x, 0x%x) failed, %x\n", "memory_object_data_request", @@ -672,9 +866,17 @@ vm_fault_return_t vm_fault_page(first_object, first_offset, * so check if it's still there and busy. */ vm_object_lock(object); - if (m == vm_page_lookup(object,offset) && - m->absent && m->busy) - VM_PAGE_FREE(m); + for (; length; + length -= PAGE_SIZE, + cluster_start += PAGE_SIZE) { + vm_page_t p; + if ((p = vm_page_lookup(object, + cluster_start)) + && p->absent && p->busy + && p != first_m) { + VM_PAGE_FREE(m); + } + } vm_fault_cleanup(object, first_m); return((rc == MACH_SEND_INTERRUPTED) ? VM_FAULT_INTERRUPTED : @@ -752,6 +954,11 @@ vm_fault_return_t vm_fault_page(first_object, first_offset, if ((object != first_object) || must_be_resident) vm_object_paging_end(object); vm_object_unlock(object); + hi_offset += object->shadow_offset; + lo_offset += object->shadow_offset; + access_required = VM_PROT_READ; + vm_object_lock(next_object); + vm_object_unlock(object); object = next_object; vm_object_paging_begin(object); } @@ -940,8 +1147,14 @@ vm_fault_return_t vm_fault_page(first_object, first_offset, * Does the page exist in the copy? */ copy_offset = first_offset - copy_object->shadow_offset; - copy_m = vm_page_lookup(copy_object, copy_offset); - if (copy_m != VM_PAGE_NULL) { + + if (copy_object->size <= copy_offset) + /* + * Copy object doesn't cover this page -- do nothing. + */ + ; + + else if ((copy_m = vm_page_lookup(copy_object, copy_offset)) != VM_PAGE_NULL){ if (copy_m->busy) { /* * If the page is being brought @@ -952,10 +1165,27 @@ vm_fault_return_t vm_fault_page(first_object, first_offset, copy_object->ref_count--; assert(copy_object->ref_count > 0); vm_object_unlock(copy_object); - goto block_and_backoff; - } + thread_block((void(*)(void))0); + goto backoff; + } } - else { + else +#if MACH_PAGEMAP + if (vm_external_state_get(object->existence_info, offset + object->paging_offset) + != VM_EXTERNAL_STATE_EXISTS) +#endif + { + /* + * If PAGED_OUT is TRUE, then the page used to exist + * in the copy-object, and has already been paged out. + * We don't need to repeat this. If PAGED_OUT is + * FALSE, then either we don't know (!pager_created, + * for example) or it hasn't been paged out. + * (VM_EXTERNAL_STATE_UNKNOWN||VM_EXTERNAL_STATE_ABSENT) + * We must copy the page to the copy object. + */ + + /* * Allocate a page for the copy */ @@ -1087,28 +1317,13 @@ vm_fault_return_t vm_fault_page(first_object, first_offset, return(VM_FAULT_SUCCESS); +#if 0 block_and_backoff: vm_fault_cleanup(object, first_m); - - if (continuation != (void (*)()) 0) { - register vm_fault_state_t *state = - (vm_fault_state_t *) current_thread()->ith_other; - - /* - * Save variables in case we must restart. - */ - - state->vmfp_backoff = TRUE; - state->vmf_prot = *protection; - - counter(c_vm_fault_page_block_backoff_user++); - thread_block(continuation); - } else - { - counter(c_vm_fault_page_block_backoff_kernel++); - thread_block((void (*)()) 0); - } - after_block_and_backoff: + counter(c_vm_fault_page_block_backoff_kernel++); + thread_block((void (*)()) 0); +#endif + backoff: if (current_thread()->wait_result == THREAD_AWAKENED) return VM_FAULT_RETRY; else @@ -1137,34 +1352,20 @@ vm_fault_return_t vm_fault_page(first_object, first_offset, * and deallocated when leaving vm_fault. */ -void -vm_fault_continue() -{ - register vm_fault_state_t *state = - (vm_fault_state_t *) current_thread()->ith_other; - - (void) vm_fault(state->vmf_map, - state->vmf_vaddr, - state->vmf_fault_type, - state->vmf_change_wiring, - TRUE, state->vmf_continuation); - /*NOTREACHED*/ -} -kern_return_t vm_fault(map, vaddr, fault_type, change_wiring, - resume, continuation) +kern_return_t vm_fault(map, vaddr, fault_type, change_wiring) vm_map_t map; vm_offset_t vaddr; vm_prot_t fault_type; boolean_t change_wiring; - boolean_t resume; - void (*continuation)(); { vm_map_version_t version; /* Map version for verificiation */ boolean_t wired; /* Should mapping be wired down? */ vm_object_t object; /* Top-level object */ vm_offset_t offset; /* Top-level offset */ vm_prot_t prot; /* Protection for mapping */ + vm_behavior_t behavior; /* Expected paging behavior */ + vm_offset_t lo_offset, hi_offset; vm_object_t old_copy_object; /* Saved copy object */ vm_page_t result_page; /* Result of vm_fault_page */ vm_page_t top_page; /* Placeholder page */ @@ -1173,49 +1374,6 @@ kern_return_t vm_fault(map, vaddr, fault_type, change_wiring, register vm_page_t m; /* Fast access to result_page */ - if (resume) { - register vm_fault_state_t *state = - (vm_fault_state_t *) current_thread()->ith_other; - - /* - * Retrieve cached variables and - * continue vm_fault_page. - */ - - object = state->vmf_object; - if (object == VM_OBJECT_NULL) - goto RetryFault; - version = state->vmf_version; - wired = state->vmf_wired; - offset = state->vmf_offset; - prot = state->vmf_prot; - - kr = vm_fault_page(object, offset, fault_type, - (change_wiring && !wired), !change_wiring, - &prot, &result_page, &top_page, - TRUE, vm_fault_continue); - goto after_vm_fault_page; - } - - if (continuation != (void (*)()) 0) { - /* - * We will probably need to save state. - */ - - char * state; - - /* - * if this assignment stmt is written as - * 'active_threads[cpu_number()] = zalloc()', - * cpu_number may be evaluated before zalloc; - * if zalloc blocks, cpu_number will be wrong - */ - - state = (char *) zalloc(vm_fault_state_zone); - current_thread()->ith_other = state; - - } - RetryFault: ; /* @@ -1224,8 +1382,8 @@ kern_return_t vm_fault(map, vaddr, fault_type, change_wiring, */ if ((kr = vm_map_lookup(&map, vaddr, fault_type, &version, - &object, &offset, - &prot, &wired)) != KERN_SUCCESS) { + &object, &offset, &prot, &wired, + &behavior, &lo_offset, &hi_offset)) != KERN_SUCCESS) { goto done; } @@ -1249,39 +1407,10 @@ kern_return_t vm_fault(map, vaddr, fault_type, change_wiring, object->ref_count++; vm_object_paging_begin(object); - if (continuation != (void (*)()) 0) { - register vm_fault_state_t *state = - (vm_fault_state_t *) current_thread()->ith_other; - - /* - * Save variables, in case vm_fault_page discards - * our kernel stack and we have to restart. - */ - - state->vmf_map = map; - state->vmf_vaddr = vaddr; - state->vmf_fault_type = fault_type; - state->vmf_change_wiring = change_wiring; - state->vmf_continuation = continuation; - - state->vmf_version = version; - state->vmf_wired = wired; - state->vmf_object = object; - state->vmf_offset = offset; - state->vmf_prot = prot; - - kr = vm_fault_page(object, offset, fault_type, - (change_wiring && !wired), !change_wiring, - &prot, &result_page, &top_page, - FALSE, vm_fault_continue); - } else - { - kr = vm_fault_page(object, offset, fault_type, - (change_wiring && !wired), !change_wiring, - &prot, &result_page, &top_page, - FALSE, (void (*)()) 0); - } - after_vm_fault_page: + kr = vm_fault_page(object, offset, fault_type, + (change_wiring && !wired), !change_wiring, + lo_offset, hi_offset, behavior, + &prot, &result_page, &top_page); /* * If we didn't succeed, lose the object reference immediately. @@ -1303,25 +1432,7 @@ kern_return_t vm_fault(map, vaddr, fault_type, change_wiring, kr = KERN_SUCCESS; goto done; case VM_FAULT_MEMORY_SHORTAGE: - if (continuation != (void (*)()) 0) { - register vm_fault_state_t *state = - (vm_fault_state_t *) current_thread()->ith_other; - - /* - * Save variables in case VM_PAGE_WAIT - * discards our kernel stack. - */ - - state->vmf_map = map; - state->vmf_vaddr = vaddr; - state->vmf_fault_type = fault_type; - state->vmf_change_wiring = change_wiring; - state->vmf_continuation = continuation; - state->vmf_object = VM_OBJECT_NULL; - - VM_PAGE_WAIT(vm_fault_continue); - } else - VM_PAGE_WAIT((void (*)()) 0); + VM_PAGE_WAIT((void (*)()) 0); goto RetryFault; case VM_FAULT_FICTITIOUS_SHORTAGE: vm_page_more_fictitious(); @@ -1387,7 +1498,7 @@ kern_return_t vm_fault(map, vaddr, fault_type, change_wiring, kr = vm_map_lookup(&map, vaddr, fault_type & ~VM_PROT_WRITE, &version, &retry_object, &retry_offset, &retry_prot, - &wired); + &wired, &behavior, &lo_offset, &hi_offset); if (kr != KERN_SUCCESS) { vm_object_lock(m->object); @@ -1486,15 +1597,6 @@ kern_return_t vm_fault(map, vaddr, fault_type, change_wiring, #undef RELEASE_PAGE done: - if (continuation != (void (*)()) 0) { - register vm_fault_state_t *state = - (vm_fault_state_t *) current_thread()->ith_other; - - zfree(vm_fault_state_zone, (vm_offset_t) state); - (*continuation)(kr); - /*NOTREACHED*/ - } - return(kr); } @@ -1531,8 +1633,7 @@ void vm_fault_wire(map, entry) for (va = entry->vme_start; va < end_addr; va += PAGE_SIZE) { if (vm_fault_wire_fast(map, va, entry) != KERN_SUCCESS) - (void) vm_fault(map, va, VM_PROT_NONE, TRUE, - FALSE, (void (*)()) 0); + (void) vm_fault(map, va, VM_PROT_NONE, TRUE); } } @@ -1565,8 +1666,7 @@ void vm_fault_unwire(map, entry) if (object == VM_OBJECT_NULL) { vm_map_lock_set_recursive(map); - (void) vm_fault(map, va, VM_PROT_NONE, TRUE, - FALSE, (void (*)()) 0); + (void) vm_fault(map, va, VM_PROT_NONE, TRUE); vm_map_lock_clear_recursive(map); } else { vm_prot_t prot; @@ -1583,10 +1683,11 @@ void vm_fault_unwire(map, entry) entry->offset + (va - entry->vme_start), VM_PROT_NONE, TRUE, - FALSE, &prot, - &result_page, - &top_page, - FALSE, (void (*)()) 0); + FALSE, + entry->offset, + entry->offset + (entry->vme_end - entry->vme_start), + entry->behavior, &prot, + &result_page, &top_page); } while (result == VM_FAULT_RETRY); if (result != VM_FAULT_SUCCESS) @@ -1851,6 +1952,13 @@ kern_return_t vm_fault_copy( vm_size_t amount_done; vm_object_t old_copy_object; + /* + * In order not to confuse the clustered pageins, align + * the different offsets on a page boundary. + */ + vm_offset_t dst_lo_offset = trunc_page(dst_offset); + vm_offset_t dst_hi_offset = round_page(dst_offset + *src_size); + #define RETURN(x) \ MACRO_BEGIN \ *src_size = amount_done; \ @@ -1877,8 +1985,8 @@ kern_return_t vm_fault_copy( switch (vm_fault_page(src_object, src_offset, VM_PROT_READ, FALSE, interruptible, - &prot, &result_page, &src_top_page, - FALSE, (void (*)()) 0)) { + dst_lo_offset, dst_hi_offset, VM_BEHAVIOR_SEQUENTIAL, + &prot, &result_page, &src_top_page)) { case VM_FAULT_SUCCESS: break; @@ -1915,9 +2023,8 @@ kern_return_t vm_fault_copy( switch (vm_fault_page(dst_object, dst_offset, VM_PROT_WRITE, FALSE, FALSE /* interruptible */, - &prot, &result_page, &dst_top_page, - FALSE, (void (*)()) 0)) { - + dst_lo_offset, dst_hi_offset, VM_BEHAVIOR_SEQUENTIAL, + &prot, &result_page, &dst_top_page)) { case VM_FAULT_SUCCESS: break; case VM_FAULT_RETRY: diff --git a/vm/vm_fault.h b/vm/vm_fault.h index 0492ccf..d1f78c5 100644 --- a/vm/vm_fault.h +++ b/vm/vm_fault.h @@ -34,6 +34,7 @@ #include #include +#include #include #include @@ -51,17 +52,16 @@ typedef kern_return_t vm_fault_return_t; extern void vm_fault_init(void); extern vm_fault_return_t vm_fault_page(vm_object_t, vm_offset_t, vm_prot_t, - boolean_t, boolean_t, vm_prot_t *, - vm_page_t *, vm_page_t *, boolean_t, - void (*)()); + boolean_t, boolean_t, + vm_offset_t, vm_offset_t, vm_behavior_t, + vm_prot_t *, vm_page_t *, vm_page_t *); extern void vm_fault_cleanup(vm_object_t, vm_page_t); /* * Page fault handling based on vm_map (or entries therein) */ -extern kern_return_t vm_fault(vm_map_t, vm_offset_t, vm_prot_t, boolean_t, - boolean_t, void (*)()); +extern kern_return_t vm_fault(vm_map_t, vm_offset_t, vm_prot_t, boolean_t); extern void vm_fault_wire(vm_map_t, vm_map_entry_t); extern void vm_fault_unwire(vm_map_t, vm_map_entry_t); diff --git a/vm/vm_map.c b/vm/vm_map.c index dc2388d..d4563ca 100644 --- a/vm/vm_map.c +++ b/vm/vm_map.c @@ -38,6 +38,7 @@ #include #include #include +#include #include #include #include @@ -594,6 +595,7 @@ kern_return_t vm_map_find_entry(map, address, size, mask, object, o_entry) (entry->inheritance == VM_INHERIT_DEFAULT) && (entry->protection == VM_PROT_DEFAULT) && (entry->max_protection == VM_PROT_ALL) && + (entry->behavior == VM_BEHAVIOR_DEFAULT) && (entry->wired_count == 1) && (entry->user_wired_count == 0) && (entry->projected_on == 0)) { @@ -620,6 +622,7 @@ kern_return_t vm_map_find_entry(map, address, size, mask, object, o_entry) new_entry->inheritance = VM_INHERIT_DEFAULT; new_entry->protection = VM_PROT_DEFAULT; new_entry->max_protection = VM_PROT_ALL; + new_entry->behavior = VM_BEHAVIOR_DEFAULT; new_entry->wired_count = 1; new_entry->user_wired_count = 0; @@ -3758,8 +3761,9 @@ retry: kr = vm_fault_page(src_object, src_offset, VM_PROT_READ, FALSE, FALSE, - &result_prot, &m, &top_page, - FALSE, (void (*)()) 0); + src_entry->offset, src_entry->offset + + (src_entry->vme_end - src_entry->vme_start), + VM_BEHAVIOR_SEQUENTIAL, &result_prot, &m, &top_page); /* * Cope with what happened. */ @@ -4325,7 +4329,8 @@ vm_map_t vm_map_fork(old_map) * remain the same. */ kern_return_t vm_map_lookup(var_map, vaddr, fault_type, out_version, - object, offset, out_prot, wired) + object, offset, out_prot, wired, + behavior, lo_offset, hi_offset) vm_map_t *var_map; /* IN/OUT */ register vm_offset_t vaddr; register vm_prot_t fault_type; @@ -4335,6 +4340,9 @@ kern_return_t vm_map_lookup(var_map, vaddr, fault_type, out_version, vm_offset_t *offset; /* OUT */ vm_prot_t *out_prot; /* OUT */ boolean_t *wired; /* OUT */ + vm_behavior_t *behavior; /* OUT */ + vm_offset_t *lo_offset; /* OUT */ + vm_offset_t *hi_offset; /* OUT */ { register vm_map_entry_t entry; register vm_map_t map = *var_map; @@ -4481,6 +4489,9 @@ kern_return_t vm_map_lookup(var_map, vaddr, fault_type, out_version, *offset = (vaddr - entry->vme_start) + entry->offset; *object = entry->object.vm_object; *out_prot = prot; + *behavior = entry->behavior; + *lo_offset = entry->offset; + *hi_offset = (entry->vme_end - entry->vme_start) + entry->offset; /* * Lock the object to prevent it from disappearing @@ -4696,6 +4707,126 @@ kern_return_t vm_map_machine_attribute(map, address, size, attribute, value) return ret; } +/* + * vm_map_range_check: [ internal use only ] + * + * Check that the region defined by the specified start and + * end addresses are wholly contained within a single map + * entry or set of adjacent map entries of the spacified map, + * i.e. the specified region contains no unmapped space. + * If any or all of the region is unmapped, FALSE is returned. + * Otherwise, TRUE is returned and if the output argument 'entry' + * is not NULL it points to the map entry containing the start + * of the region. + * + * The map is locked for reading on entry and is left locked. + */ +boolean_t +vm_map_range_check( + register vm_map_t map, + register vm_offset_t start, + register vm_offset_t end, + vm_map_entry_t *entry) +{ + vm_map_entry_t cur; + register vm_offset_t prev; + + /* + * Basic sanity checks first + */ + if (start < vm_map_min(map) || end > vm_map_max(map) || start > end) + return (FALSE); + + /* + * Check first if the region starts within a valid + * mapping for the map. + */ + if (!vm_map_lookup_entry(map, start, &cur)) + return (FALSE); + + /* + * Optimize for the case that the region is contained + * in a single map entry. + */ + if (entry != (vm_map_entry_t *) NULL) + *entry = cur; + if (end <= cur->vme_end) + return (TRUE); + + /* + * If the region is not wholly contained within a + * single entry, walk the entries looking for holes. + */ + prev = cur->vme_end; + cur = cur->vme_next; + while ((cur != vm_map_to_entry(map)) && (prev == cur->vme_start)) { + if (end <= cur->vme_end) + return (TRUE); + prev = cur->vme_end; + cur = cur->vme_next; + } + return (FALSE); +} + + +/* + * vm_map_behavior_set: + * + * Sets the paging reference behavior of the specified address + * range in the target map. Paging reference behavior affects + * how pagein operations resulting from faults on the map will be + * clustered. + */ +kern_return_t +vm_map_behavior_set( + vm_map_t map, + vm_offset_t start, + vm_offset_t end, + vm_behavior_t new_behavior) +{ + register vm_map_entry_t entry; + vm_map_entry_t temp_entry; + + switch (new_behavior) { + case VM_BEHAVIOR_DEFAULT: + case VM_BEHAVIOR_RANDOM: + case VM_BEHAVIOR_SEQUENTIAL: + case VM_BEHAVIOR_RSEQNTL: + break; + default: + return(KERN_INVALID_ARGUMENT); + } + + vm_map_lock(map); + + /* + * The entire address range must be valid for the map. + * Note that vm_map_range_check() does a + * vm_map_lookup_entry() internally and returns the + * entry containing the start of the address range if + * the entire range is valid. + */ + if (vm_map_range_check(map, start, end, &temp_entry)) { + entry = temp_entry; + vm_map_clip_start(map, entry, start); + } + else { + vm_map_unlock(map); + return(KERN_INVALID_ADDRESS); + } + + while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) { + vm_map_clip_end(map, entry, end); + + entry->behavior = new_behavior; + + entry = entry->vme_next; + } + + vm_map_unlock(map); + return(KERN_SUCCESS); +} + #if MACH_KDB diff --git a/vm/vm_map.h b/vm/vm_map.h index 567fe93..236d3c6 100644 --- a/vm/vm_map.h +++ b/vm/vm_map.h @@ -46,6 +46,7 @@ #include #include #include +#include #include #include #include @@ -121,6 +122,7 @@ struct vm_map_entry { -1 for non-persistent kernel map projected buffer entry; pointer to corresponding kernel map entry for user map projected buffer entry */ + vm_behavior_t behavior; }; typedef struct vm_map_entry *vm_map_entry_t; @@ -392,7 +394,8 @@ extern void vm_map_print(vm_map_t); /* Look up an address */ extern kern_return_t vm_map_lookup(vm_map_t *, vm_offset_t, vm_prot_t, vm_map_version_t *, vm_object_t *, - vm_offset_t *, vm_prot_t *, boolean_t *); + vm_offset_t *, vm_prot_t *, boolean_t *, + vm_behavior_t *, vm_offset_t *, vm_offset_t *); /* Find a map entry */ extern boolean_t vm_map_lookup_entry(vm_map_t, vm_offset_t, vm_map_entry_t *); @@ -515,6 +518,28 @@ extern kern_return_t vm_map_submap( #define vm_map_entry_wakeup(map) thread_wakeup((event_t)&(map)->hdr) /* + * vm_map_range_check: [ internal use only ] + * + * Check that the region defined by the specified start and + * end addresses are wholly contained within a single map + * entry or set of adjacent map entries of the spacified map, + * i.e. the specified region contains no unmapped space. + * If any or all of the region is unmapped, FALSE is returned. + * Otherwise, TRUE is returned and if the output argument 'entry' + * is not NULL it points to the map entry containing the start + * of the region. + * + * The map is locked for reading on entry and is left locked. + */ +boolean_t +vm_map_range_check( + register vm_map_t map, + register vm_offset_t start, + register vm_offset_t end, + vm_map_entry_t *entry); + + +/* * This routine is called only when it is known that * the entry must be split. */ diff --git a/vm/vm_object.c b/vm/vm_object.c index 9057973..6092e58 100644 --- a/vm/vm_object.c +++ b/vm/vm_object.c @@ -293,6 +293,7 @@ void vm_object_bootstrap(void) vm_object_template->lock_restart = FALSE; vm_object_template->use_old_pageout = TRUE; /* XXX change later */ vm_object_template->last_alloc = (vm_offset_t) 0; + vm_object_template->cluster_size = 0; #if MACH_PAGEMAP vm_object_template->existence_info = VM_EXTERNAL_NULL; @@ -1066,6 +1067,9 @@ kern_return_t vm_object_copy_slowly( vm_object_t new_object; vm_offset_t new_offset; + vm_offset_t src_lo_offset = src_offset; + vm_offset_t src_hi_offset = src_offset + size; + if (size == 0) { vm_object_unlock(src_object); *_result_object = VM_OBJECT_NULL; @@ -1118,8 +1122,8 @@ kern_return_t vm_object_copy_slowly( result = vm_fault_page(src_object, src_offset, VM_PROT_READ, FALSE, interruptible, - &prot, &_result_page, &top_page, - FALSE, (void (*)()) 0); + src_lo_offset, src_hi_offset, VM_BEHAVIOR_SEQUENTIAL, + &prot, &_result_page, &top_page); switch(result) { case VM_FAULT_SUCCESS: @@ -1993,6 +1997,7 @@ vm_object_t vm_object_enter( vm_object_t object; vm_object_t new_object; boolean_t must_init; + vm_size_t cluster_size; ipc_kobject_type_t po; restart: @@ -2131,7 +2136,7 @@ restart: if (internal) { /* acquire a naked send right for the DMM */ - ipc_port_t DMM = memory_manager_default_reference(); + ipc_port_t DMM = memory_manager_default_reference(&cluster_size); /* mark the object internal */ object->internal = TRUE; @@ -2556,6 +2561,7 @@ void vm_object_collapse( object->pager_created = backing_object->pager_created; object->pager_request = backing_object->pager_request; + object->cluster_size = backing_object->cluster_size; if (object->pager_request != IP_NULL) ipc_kobject_set(object->pager_request, (ipc_kobject_t) object, @@ -2965,8 +2971,8 @@ void vm_object_print( if (object == VM_OBJECT_NULL) return; - iprintf("Object 0x%X: size=0x%X", - (vm_offset_t) object, (vm_offset_t) object->size); + iprintf("Object 0x%X: size=0x%X, cluster_size=0x%X", + (vm_offset_t) object, (vm_offset_t) object->size, (vm_offset_t) object->cluster_size); printf(", %d references, %d resident pages,", object->ref_count, object->resident_page_count); printf(" %d absent pages,", object->absent_count); diff --git a/vm/vm_object.h b/vm/vm_object.h index c992570..4690d92 100644 --- a/vm/vm_object.h +++ b/vm/vm_object.h @@ -155,6 +155,7 @@ struct vm_object { * of their can_persist value */ vm_offset_t last_alloc; /* last allocation offset */ + vm_size_t cluster_size; /* size of paging cluster */ #if MACH_PAGEMAP vm_external_t existence_info; #endif /* MACH_PAGEMAP */ diff --git a/vm/vm_user.c b/vm/vm_user.c index 672daab..ecdf37e 100644 --- a/vm/vm_user.c +++ b/vm/vm_user.c @@ -417,3 +417,24 @@ kern_return_t vm_wire(host, map, start, size, access) round_page(start+size), access); } + +/* + * vm_behavior_set sets the paging behavior attribute for the + * specified range in the specified map. This routine will fail + * with KERN_INVALID_ADDRESS if any address in [start,start+size) + * is not a valid allocated or reserved memory region. + */ +kern_return_t +vm_behavior_set( + vm_map_t map, + vm_offset_t start, + vm_size_t size, + vm_behavior_t new_behavior) +{ + if (map == VM_MAP_NULL) + return(KERN_INVALID_ARGUMENT); + + return(vm_map_behavior_set(map, trunc_page(start), + round_page(start+size), new_behavior)); +} +