Merge git://git.kernel.org/pub/scm/linux/kernel/git/cmetcalf/linux-tile

author Linus Torvalds <[email protected]>

Fri, 25 May 2012 22:59:38 +0000 (15:59 -0700)

committer Linus Torvalds <[email protected]>

Fri, 25 May 2012 22:59:38 +0000 (15:59 -0700)
author Linus Torvalds <[email protected]>
Fri, 25 May 2012 22:59:38 +0000 (15:59 -0700)
committer Linus Torvalds <[email protected]>
Fri, 25 May 2012 22:59:38 +0000 (15:59 -0700)
diff --combined arch/tile/Kconfig

index 6ad6219fc47e0bf4a0751ec862dffa2a6a115fd3,0294b212e57d955fef3a3d1ac8da9540fb8ee106..fe128816c448a5f593f419c8b695461bebcd74ec
--- 1/arch/tile/Kconfig
--- 2/arch/tile/Kconfig
+++ b/arch/tile/Kconfig
@@@ -14,7 -14,6 +14,7 @@@ config TIL
         select HAVE_SYSCALL_WRAPPERS if TILEGX
         select SYS_HYPERVISOR
         select ARCH_HAVE_NMI_SAFE_CMPXCHG
+ +      select GENERIC_CLOCKEVENTS
   
   # FIXME: investigate whether we need/want these options.
   #     select HAVE_IOREMAP_PROT
@@@ -48,6 -47,17 +48,14 @@@ config NEED_PER_CPU_PAGE_FIRST_CHUN
   config SYS_SUPPORTS_HUGETLBFS
         def_bool y
   
- -config GENERIC_CLOCKEVENTS
- -      def_bool y
- -
+ # Support for additional huge page sizes besides HPAGE_SIZE.
+ # The software support is currently only present in the TILE-Gx
+ # hypervisor. TILEPro in any case does not support page sizes
+ # larger than the default HPAGE_SIZE.
+ config HUGETLB_SUPER_PAGES
+       depends on HUGETLB_PAGE && TILEGX
+       def_bool y
+ 
   # FIXME: tilegx can implement a more efficient rwsem.
   config RWSEM_GENERIC_SPINLOCK
         def_bool y
@@@ -107,16 -117,14 +115,14 @@@ config HVC_TIL
         select HVC_DRIVER
         def_bool y
   
- # Please note: TILE-Gx support is not yet finalized; this is
- # the preliminary support.  TILE-Gx drivers are only provided
- # with the alpha or beta test versions for Tilera customers.
   config TILEGX
-       depends on EXPERIMENTAL
         bool "Building with TILE-Gx (64-bit) compiler and toolchain"
   
+ config TILEPRO
+       def_bool !TILEGX
+ 
   config 64BIT
-       depends on TILEGX
-       def_bool y
+       def_bool TILEGX
   
   config ARCH_DEFCONFIG
         string
@@@ -137,6 -145,33 +143,31 @@@ config NR_CPU
           smaller kernel memory footprint results from using a smaller
           value on chips with fewer tiles.
   
- -source "kernel/time/Kconfig"
- -
+ if TILEGX
+ 
+ choice
+       prompt "Kernel page size"
+       default PAGE_SIZE_64KB
+       help
+         This lets you select the page size of the kernel.  For best
+         performance on memory-intensive applications, a page size of 64KB
+         is recommended.  For workloads involving many small files, many
+         connections, etc., it may be better to select 16KB, which uses
+         memory more efficiently at some cost in TLB performance.
+ 
+         Note that this option is TILE-Gx specific; currently
+         TILEPro page size is set by rebuilding the hypervisor.
+ 
+ config PAGE_SIZE_16KB
+       bool "16KB"
+ 
+ config PAGE_SIZE_64KB
+       bool "64KB"
+ 
+ endchoice
+ 
+ endif
+ 
   source "kernel/Kconfig.hz"
   
   config KEXEC
diff --combined arch/tile/include/asm/processor.h

index 15cd8a4a06ce1dc62170444629edc7c17d9da2ab,e85a9af129685d5f7a5ce6383e4c2e2dcb09f88e..8c4dd9ff91eb91745e8918506375f2d4cb10d588
--- 1/arch/tile/include/asm/processor.h
--- 2/arch/tile/include/asm/processor.h
+++ b/arch/tile/include/asm/processor.h
@@@ -76,6 -76,17 +76,17 @@@ struct async_tlb 
   
   #ifdef CONFIG_HARDWALL
   struct hardwall_info;
+ struct hardwall_task {
+       /* Which hardwall is this task tied to? (or NULL if none) */
+       struct hardwall_info *info;
+       /* Chains this task into the list at info->task_head. */
+       struct list_head list;
+ };
+ #ifdef __tilepro__
+ #define HARDWALL_TYPES 1   /* udn */
+ #else
+ #define HARDWALL_TYPES 3   /* udn, idn, and ipi */
+ #endif
   #endif
   
   struct thread_struct {
@@@ -116,10 -127,8 +127,8 @@@
         unsigned long dstream_pf;
   #endif
   #ifdef CONFIG_HARDWALL
-       /* Is this task tied to an activated hardwall? */
-       struct hardwall_info *hardwall;
-       /* Chains this task into the list at hardwall->list. */
-       struct list_head hardwall_list;
+       /* Hardwall information for various resources. */
+       struct hardwall_task hardwall[HARDWALL_TYPES];
   #endif
   #if CHIP_HAS_TILE_DMA()
         /* Async DMA TLB fault information */
@@@ -210,6 -219,9 +219,6 @@@ static inline void release_thread(struc
         /* Nothing for now */
   }
   
- -/* Prepare to copy thread state - unlazy all lazy status. */
- -#define prepare_to_copy(tsk)  do { } while (0)
- -
   extern int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags);
   
   extern int do_work_pending(struct pt_regs *regs, u32 flags);
diff --combined arch/tile/kernel/Makefile

index 0d826faf8f358c7a78fefb69e38af0d8927e3fad,f19116df2c62d3c11f6a77d2737a812f57eef1aa..5de99248d8df1a018aa200f48b249765690bc383
--- 1/arch/tile/kernel/Makefile
--- 2/arch/tile/kernel/Makefile
+++ b/arch/tile/kernel/Makefile
@@@ -3,16 -3,15 +3,15 @@@
   #
   
   extra-y := vmlinux.lds head_$(BITS).o
- -obj-y := backtrace.o entry.o init_task.o irq.o messaging.o \
+ +obj-y := backtrace.o entry.o irq.o messaging.o \
         pci-dma.o proc.o process.o ptrace.o reboot.o \
         setup.o signal.o single_step.o stack.o sys.o sysfs.o time.o traps.o \
         intvec_$(BITS).o regs_$(BITS).o tile-desc_$(BITS).o
   
   obj-$(CONFIG_HARDWALL)                += hardwall.o
- obj-$(CONFIG_TILEGX)          += futex_64.o
   obj-$(CONFIG_COMPAT)          += compat.o compat_signal.o
   obj-$(CONFIG_SMP)             += smpboot.o smp.o tlb.o
   obj-$(CONFIG_MODULES)         += module.o
   obj-$(CONFIG_EARLY_PRINTK)    += early_printk.o
- obj-$(CONFIG_KEXEC)           += machine_kexec.o relocate_kernel.o
+ obj-$(CONFIG_KEXEC)           += machine_kexec.o relocate_kernel_$(BITS).o
   obj-$(CONFIG_PCI)             += pci.o
diff --combined arch/tile/kernel/process.c

index f572c19c4082db3f803fb9441cf5ff9a355fe832,03448eb189a72222288112cc47609e84af93be6d..ba1023d8a0218ca3a33918cf3a256a592a77fb5f
--- 1/arch/tile/kernel/process.c
--- 2/arch/tile/kernel/process.c
+++ b/arch/tile/kernel/process.c
@@@ -114,10 -114,27 +114,10 @@@ void cpu_idle(void
         }
   }
   
- -struct thread_info *alloc_thread_info_node(struct task_struct *task, int node)
- -{
- -      struct page *page;
- -      gfp_t flags = GFP_KERNEL;
- -
- -#ifdef CONFIG_DEBUG_STACK_USAGE
- -      flags |= __GFP_ZERO;
- -#endif
- -
- -      page = alloc_pages_node(node, flags, THREAD_SIZE_ORDER);
- -      if (!page)
- -              return NULL;
- -
- -      return (struct thread_info *)page_address(page);
- -}
- -
   /*
- - * Free a thread_info node, and all of its derivative
- - * data structures.
+ + * Release a thread_info structure
    */
- -void free_thread_info(struct thread_info *info)
+ +void arch_release_thread_info(struct thread_info *info)
   {
         struct single_step_state *step_state = info->step_state;
   
@@@ -128,10 -145,10 +128,10 @@@
          * Calling deactivate here just frees up the data structures.
          * If the task we're freeing held the last reference to a
          * hardwall fd, it would have been released prior to this point
-        * anyway via exit_files(), and "hardwall" would be NULL by now.
+        * anyway via exit_files(), and the hardwall_task.info pointers
+        * would be NULL by now.
          */
-       if (info->task->thread.hardwall)
-               hardwall_deactivate(info->task);
+       hardwall_deactivate_all(info->task);
   #endif
   
         if (step_state) {
@@@ -152,6 -169,8 +152,6 @@@
                  */
                 kfree(step_state);
         }
- -
- -      free_pages((unsigned long)info, THREAD_SIZE_ORDER);
   }
   
   static void save_arch_state(struct thread_struct *t);
@@@ -245,7 -264,8 +245,8 @@@ int copy_thread(unsigned long clone_fla
   
   #ifdef CONFIG_HARDWALL
         /* New thread does not own any networks. */
-       p->thread.hardwall = NULL;
+       memset(&p->thread.hardwall[0], 0,
+              sizeof(struct hardwall_task) * HARDWALL_TYPES);
   #endif
   
   
@@@ -515,12 -535,7 +516,7 @@@ struct task_struct *__sched _switch_to(
   
   #ifdef CONFIG_HARDWALL
         /* Enable or disable access to the network registers appropriately. */
-       if (prev->thread.hardwall != NULL) {
-               if (next->thread.hardwall == NULL)
-                       restrict_network_mpls();
-       } else if (next->thread.hardwall != NULL) {
-               grant_network_mpls();
-       }
+       hardwall_switch_tasks(prev, next);
   #endif
   
         /*
diff --combined arch/tile/kernel/setup.c

index 98d80eb49ddbd912a0957b0dce0ad4d121be665a,445c220eae514ae1f3777579a8baea21aff6d576..6098ccc59be2484a22a5f11c862569d9850c4248
--- 1/arch/tile/kernel/setup.c
--- 2/arch/tile/kernel/setup.c
+++ b/arch/tile/kernel/setup.c
@@@ -28,6 -28,7 +28,7 @@@
   #include <linux/highmem.h>
   #include <linux/smp.h>
   #include <linux/timex.h>
+ #include <linux/hugetlb.h>
   #include <asm/setup.h>
   #include <asm/sections.h>
   #include <asm/cacheflush.h>
@@@ -49,9 -50,6 +50,6 @@@ char chip_model[64] __write_once
   struct pglist_data node_data[MAX_NUMNODES] __read_mostly;
   EXPORT_SYMBOL(node_data);
   
- /* We only create bootmem data on node 0. */
- static bootmem_data_t __initdata node0_bdata;
- 
   /* Information on the NUMA nodes that we compute early */
   unsigned long __cpuinitdata node_start_pfn[MAX_NUMNODES];
   unsigned long __cpuinitdata node_end_pfn[MAX_NUMNODES];
@@@ -61,22 -59,6 +59,22 @@@ unsigned long __initdata node_free_pfn[
   
   static unsigned long __initdata node_percpu[MAX_NUMNODES];
   
+ +/*
+ + * per-CPU stack and boot info.
+ + */
+ +DEFINE_PER_CPU(unsigned long, boot_sp) =
+ +      (unsigned long)init_stack + THREAD_SIZE;
+ +
+ +#ifdef CONFIG_SMP
+ +DEFINE_PER_CPU(unsigned long, boot_pc) = (unsigned long)start_kernel;
+ +#else
+ +/*
+ + * The variable must be __initdata since it references __init code.
+ + * With CONFIG_SMP it is per-cpu data, which is exempt from validation.
+ + */
+ +unsigned long __initdata boot_pc = (unsigned long)start_kernel;
+ +#endif
+ +
   #ifdef CONFIG_HIGHMEM
   /* Page frame index of end of lowmem on each controller. */
   unsigned long __cpuinitdata node_lowmem_end_pfn[MAX_NUMNODES];
@@@ -534,37 -516,96 +532,96 @@@ static void __init setup_memory(void
   #endif
   }
   
- static void __init setup_bootmem_allocator(void)
+ /*
+  * On 32-bit machines, we only put bootmem on the low controller,
+  * since PAs > 4GB can't be used in bootmem.  In principle one could
+  * imagine, e.g., multiple 1 GB controllers all of which could support
+  * bootmem, but in practice using controllers this small isn't a
+  * particularly interesting scenario, so we just keep it simple and
+  * use only the first controller for bootmem on 32-bit machines.
+  */
+ static inline int node_has_bootmem(int nid)
   {
-       unsigned long bootmap_size, first_alloc_pfn, last_alloc_pfn;
+ #ifdef CONFIG_64BIT
+       return 1;
+ #else
+       return nid == 0;
+ #endif
+ }
   
-       /* Provide a node 0 bdata. */
-       NODE_DATA(0)->bdata = &node0_bdata;
+ static inline unsigned long alloc_bootmem_pfn(int nid,
+                                             unsigned long size,
+                                             unsigned long goal)
+ {
+       void *kva = __alloc_bootmem_node(NODE_DATA(nid), size,
+                                        PAGE_SIZE, goal);
+       unsigned long pfn = kaddr_to_pfn(kva);
+       BUG_ON(goal && PFN_PHYS(pfn) != goal);
+       return pfn;
+ }
   
- #ifdef CONFIG_PCI
-       /* Don't let boot memory alias the PCI region. */
-       last_alloc_pfn = min(max_low_pfn, pci_reserve_start_pfn);
+ static void __init setup_bootmem_allocator_node(int i)
+ {
+       unsigned long start, end, mapsize, mapstart;
+ 
+       if (node_has_bootmem(i)) {
+               NODE_DATA(i)->bdata = &bootmem_node_data[i];
+       } else {
+               /* Share controller zero's bdata for now. */
+               NODE_DATA(i)->bdata = &bootmem_node_data[0];
+               return;
+       }
+ 
+       /* Skip up to after the bss in node 0. */
+       start = (i == 0) ? min_low_pfn : node_start_pfn[i];
+ 
+       /* Only lowmem, if we're a HIGHMEM build. */
+ #ifdef CONFIG_HIGHMEM
+       end = node_lowmem_end_pfn[i];
   #else
-       last_alloc_pfn = max_low_pfn;
+       end = node_end_pfn[i];
   #endif
   
-       /*
-        * Initialize the boot-time allocator (with low memory only):
-        * The first argument says where to put the bitmap, and the
-        * second says where the end of allocatable memory is.
-        */
-       bootmap_size = init_bootmem(min_low_pfn, last_alloc_pfn);
+       /* No memory here. */
+       if (end == start)
+               return;
+ 
+       /* Figure out where the bootmem bitmap is located. */
+       mapsize = bootmem_bootmap_pages(end - start);
+       if (i == 0) {
+               /* Use some space right before the heap on node 0. */
+               mapstart = start;
+               start += mapsize;
+       } else {
+               /* Allocate bitmap on node 0 to avoid page table issues. */
+               mapstart = alloc_bootmem_pfn(0, PFN_PHYS(mapsize), 0);
+       }
   
+       /* Initialize a node. */
+       init_bootmem_node(NODE_DATA(i), mapstart, start, end);
+ 
+       /* Free all the space back into the allocator. */
+       free_bootmem(PFN_PHYS(start), PFN_PHYS(end - start));
+ 
+ #if defined(CONFIG_PCI)
         /*
-        * Let the bootmem allocator use all the space we've given it
-        * except for its own bitmap.
+        * Throw away any memory aliased by the PCI region.  FIXME: this
+        * is a temporary hack to work around bug 10502, and needs to be
+        * fixed properly.
          */
-       first_alloc_pfn = min_low_pfn + PFN_UP(bootmap_size);
-       if (first_alloc_pfn >= last_alloc_pfn)
-               early_panic("Not enough memory on controller 0 for bootmem\n");
+       if (pci_reserve_start_pfn < end && pci_reserve_end_pfn > start)
+               reserve_bootmem(PFN_PHYS(pci_reserve_start_pfn),
+                               PFN_PHYS(pci_reserve_end_pfn -
+                                        pci_reserve_start_pfn),
+                               BOOTMEM_EXCLUSIVE);
+ #endif
+ }
   
-       free_bootmem(PFN_PHYS(first_alloc_pfn),
-                    PFN_PHYS(last_alloc_pfn - first_alloc_pfn));
+ static void __init setup_bootmem_allocator(void)
+ {
+       int i;
+       for (i = 0; i < MAX_NUMNODES; ++i)
+               setup_bootmem_allocator_node(i);
   
   #ifdef CONFIG_KEXEC
         if (crashk_res.start != crashk_res.end)
@@@ -595,14 -636,6 +652,6 @@@ static int __init percpu_size(void
         return size;
   }
   
- static inline unsigned long alloc_bootmem_pfn(int size, unsigned long goal)
- {
-       void *kva = __alloc_bootmem(size, PAGE_SIZE, goal);
-       unsigned long pfn = kaddr_to_pfn(kva);
-       BUG_ON(goal && PFN_PHYS(pfn) != goal);
-       return pfn;
- }
- 
   static void __init zone_sizes_init(void)
   {
         unsigned long zones_size[MAX_NR_ZONES] = { 0 };
@@@ -640,21 -673,22 +689,22 @@@
                  * though, there'll be no lowmem, so we just alloc_bootmem
                  * the memmap.  There will be no percpu memory either.
                  */
-               if (__pfn_to_highbits(start) == 0) {
-                       /* In low PAs, allocate via bootmem. */
+               if (i != 0 && cpu_isset(i, isolnodes)) {
+                       node_memmap_pfn[i] =
+                               alloc_bootmem_pfn(0, memmap_size, 0);
+                       BUG_ON(node_percpu[i] != 0);
+               } else if (node_has_bootmem(start)) {
                         unsigned long goal = 0;
                         node_memmap_pfn[i] =
-                               alloc_bootmem_pfn(memmap_size, goal);
+                               alloc_bootmem_pfn(i, memmap_size, 0);
                         if (kdata_huge)
                                 goal = PFN_PHYS(lowmem_end) - node_percpu[i];
                         if (node_percpu[i])
                                 node_percpu_pfn[i] =
-                                   alloc_bootmem_pfn(node_percpu[i], goal);
-               } else if (cpu_isset(i, isolnodes)) {
-                       node_memmap_pfn[i] = alloc_bootmem_pfn(memmap_size, 0);
-                       BUG_ON(node_percpu[i] != 0);
+                                       alloc_bootmem_pfn(i, node_percpu[i],
+                                                         goal);
                 } else {
-                       /* In high PAs, just reserve some pages. */
+                       /* In non-bootmem zones, just reserve some pages. */
                         node_memmap_pfn[i] = node_free_pfn[i];
                         node_free_pfn[i] += PFN_UP(memmap_size);
                         if (!kdata_huge) {
@@@ -678,16 -712,9 +728,9 @@@
                 zones_size[ZONE_NORMAL] = end - start;
   #endif
   
-               /*
-                * Everyone shares node 0's bootmem allocator, but
-                * we use alloc_remap(), above, to put the actual
-                * struct page array on the individual controllers,
-                * which is most of the data that we actually care about.
-                * We can't place bootmem allocators on the other
-                * controllers since the bootmem allocator can only
-                * operate on 32-bit physical addresses.
-                */
-               NODE_DATA(i)->bdata = NODE_DATA(0)->bdata;
+               /* Take zone metadata from controller 0 if we're isolnode. */
+               if (node_isset(i, isolnodes))
+                       NODE_DATA(i)->bdata = &bootmem_node_data[0];
   
                 free_area_init_node(i, zones_size, start, NULL);
                 printk(KERN_DEBUG "  Normal zone: %ld per-cpu pages\n",
@@@ -870,6 -897,22 +913,22 @@@ subsys_initcall(topology_init)
   
   #endif /* CONFIG_NUMA */
   
+ /*
+  * Initialize hugepage support on this cpu.  We do this on all cores
+  * early in boot: before argument parsing for the boot cpu, and after
+  * argument parsing but before the init functions run on the secondaries.
+  * So the values we set up here in the hypervisor may be overridden on
+  * the boot cpu as arguments are parsed.
+  */
+ static __cpuinit void init_super_pages(void)
+ {
+ #ifdef CONFIG_HUGETLB_SUPER_PAGES
+       int i;
+       for (i = 0; i < HUGE_SHIFT_ENTRIES; ++i)
+               hv_set_pte_super_shift(i, huge_shift[i]);
+ #endif
+ }
+ 
   /**
    * setup_cpu() - Do all necessary per-cpu, tile-specific initialization.
    * @boot: Is this the boot cpu?
@@@ -924,6 -967,8 +983,8 @@@ void __cpuinit setup_cpu(int boot
         /* Reset the network state on this cpu. */
         reset_network_state();
   #endif
+ 
+       init_super_pages();
   }
   
   #ifdef CONFIG_BLK_DEV_INITRD
@@@ -1412,13 -1457,13 +1473,13 @@@ void __init setup_per_cpu_areas(void
                 for (i = 0; i < size; i += PAGE_SIZE, ++pfn, ++pg) {
   
                         /* Update the vmalloc mapping and page home. */
-                       pte_t *ptep =
-                               virt_to_pte(NULL, (unsigned long)ptr + i);
+                       unsigned long addr = (unsigned long)ptr + i;
+                       pte_t *ptep = virt_to_pte(NULL, addr);
                         pte_t pte = *ptep;
                         BUG_ON(pfn != pte_pfn(pte));
                         pte = hv_pte_set_mode(pte, HV_PTE_MODE_CACHE_TILE_L3);
                         pte = set_remote_cache_cpu(pte, cpu);
-                       set_pte(ptep, pte);
+                       set_pte_at(&init_mm, addr, ptep, pte);
   
                         /* Update the lowmem mapping for consistency. */
                         lowmem_va = (unsigned long)pfn_to_kaddr(pfn);
@@@ -1431,7 -1476,7 +1492,7 @@@
                                 BUG_ON(pte_huge(*ptep));
                         }
                         BUG_ON(pfn != pte_pfn(*ptep));
-                       set_pte(ptep, pte);
+                       set_pte_at(&init_mm, lowmem_va, ptep, pte);
                 }
         }
author	Linus Torvalds <[email protected]>
	Fri, 25 May 2012 22:59:38 +0000 (15:59 -0700)
committer	Linus Torvalds <[email protected]>
	Fri, 25 May 2012 22:59:38 +0000 (15:59 -0700)
		1	2
arch/tile/Kconfig	patch \|	diff1 \|	diff2 \|	blob \| history
arch/tile/include/asm/processor.h	patch \|	diff1 \|	diff2 \|	blob \| history
arch/tile/kernel/Makefile	patch \|	diff1 \|	diff2 \|	blob \| history
arch/tile/kernel/process.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/tile/kernel/setup.c	patch \|	diff1 \|	diff2 \|	blob \| history