Index: kernel/arch/sparc64/src/smp/ipi.c
===================================================================
--- kernel/arch/sparc64/src/smp/ipi.c	(revision f4c2b6a104d4ab9ce057413b12ff1efedcba0fcd)
+++ kernel/arch/sparc64/src/smp/ipi.c	(revision 20eb5e4d3d9057eaf8e976463ec55ff13ff5a8e3)
@@ -47,4 +47,31 @@
 #include <panic.h>
 
+/** Set the contents of the outgoing interrupt vector data.
+ *
+ * The first data item (data 0) will be set to the value of func, the
+ * rest of the vector will contain zeros.
+ *
+ * This is a helper function used from within the cross_call function.
+ *
+ * @param func value the first data item of the vector will be set to
+ */
+static inline void set_intr_w_data(void (* func)(void))
+{
+#if defined (US)
+	asi_u64_write(ASI_INTR_W, ASI_UDB_INTR_W_DATA_0, (uintptr_t) func);
+	asi_u64_write(ASI_INTR_W, ASI_UDB_INTR_W_DATA_1, 0);
+	asi_u64_write(ASI_INTR_W, ASI_UDB_INTR_W_DATA_2, 0);
+#elif defined (US3)
+	asi_u64_write(ASI_INTR_W, VA_INTR_W_DATA_0, (uintptr_t)	func);
+	asi_u64_write(ASI_INTR_W, VA_INTR_W_DATA_1, 0);
+	asi_u64_write(ASI_INTR_W, VA_INTR_W_DATA_2, 0);
+	asi_u64_write(ASI_INTR_W, VA_INTR_W_DATA_3, 0);
+	asi_u64_write(ASI_INTR_W, VA_INTR_W_DATA_4, 0);
+	asi_u64_write(ASI_INTR_W, VA_INTR_W_DATA_5, 0);
+	asi_u64_write(ASI_INTR_W, VA_INTR_W_DATA_6, 0);
+	asi_u64_write(ASI_INTR_W, VA_INTR_W_DATA_7, 0);
+#endif
+}
+
 /** Invoke function on another processor.
  *
@@ -74,12 +101,11 @@
 		panic("Interrupt Dispatch Status busy bit set\n");
 	
+	ASSERT(!(pstate_read() & PSTATE_IE_BIT));
+	
 	do {
-		asi_u64_write(ASI_UDB_INTR_W, ASI_UDB_INTR_W_DATA_0,
-		    (uintptr_t)	func);
-		asi_u64_write(ASI_UDB_INTR_W, ASI_UDB_INTR_W_DATA_1, 0);
-		asi_u64_write(ASI_UDB_INTR_W, ASI_UDB_INTR_W_DATA_2, 0);
-		asi_u64_write(ASI_UDB_INTR_W,
+		set_intr_w_data(func);
+		asi_u64_write(ASI_INTR_W,
 		    (mid << INTR_VEC_DISPATCH_MID_SHIFT) |
-		    ASI_UDB_INTR_W_DISPATCH, 0);
+		    VA_INTR_W_DISPATCH, 0);
 	
 		membar();
Index: kernel/arch/sparc64/src/smp/smp.c
===================================================================
--- kernel/arch/sparc64/src/smp/smp.c	(revision f4c2b6a104d4ab9ce057413b12ff1efedcba0fcd)
+++ kernel/arch/sparc64/src/smp/smp.c	(revision 20eb5e4d3d9057eaf8e976463ec55ff13ff5a8e3)
@@ -36,4 +36,5 @@
 #include <genarch/ofw/ofw_tree.h>
 #include <cpu.h>
+#include <arch/cpu_family.h>
 #include <arch/cpu.h>
 #include <arch.h>
@@ -44,4 +45,5 @@
 #include <synch/waitq.h>
 #include <print.h>
+#include <arch/cpu_node.h>
 
 /**
@@ -62,11 +64,51 @@
 	count_t cnt = 0;
 	
-	node = ofw_tree_find_child_by_device_type(ofw_tree_lookup("/"), "cpu");
-	while (node) {
-		cnt++;
-		node = ofw_tree_find_peer_by_device_type(node, "cpu");
+	if (is_us() || is_us_iii()) {
+		node = ofw_tree_find_child_by_device_type(cpus_parent(), "cpu");
+		while (node) {
+			cnt++;
+			node = ofw_tree_find_peer_by_device_type(node, "cpu");
+		}
+	} else if (is_us_iv()) {
+		node = ofw_tree_find_child(cpus_parent(), "cmp");
+		while (node) {
+			cnt += 2;
+			node = ofw_tree_find_peer_by_name(node, "cmp");
+		}
 	}
 	
 	config.cpu_count = max(1, cnt);
+}
+
+/**
+ * Wakes up the CPU which is represented by the "node" OFW tree node.
+ * If "node" represents the current CPU, calling the function has
+ * no effect. 
+ */
+static void wakeup_cpu(ofw_tree_node_t *node)
+{
+	uint32_t mid;
+	ofw_tree_property_t *prop;
+		
+	/* 'upa-portid' for US, 'portid' for US-III, 'cpuid' for US-IV */
+	prop = ofw_tree_getprop(node, "upa-portid");
+	if ((!prop) || (!prop->value))
+		prop = ofw_tree_getprop(node, "portid");
+	if ((!prop) || (!prop->value))
+		prop = ofw_tree_getprop(node, "cpuid");
+		
+	if (!prop || prop->value == NULL)
+		return;
+		
+	mid = *((uint32_t *) prop->value);
+	if (CPU->arch.mid == mid)
+		return;
+
+	waking_up_mid = mid;
+		
+	if (waitq_sleep_timeout(&ap_completion_wq, 1000000, SYNCH_FLAGS_NONE) ==
+	    ESYNCH_TIMEOUT)
+		printf("%s: waiting for processor (mid = %" PRIu32
+		    ") timed out\n", __func__, mid);
 }
 
@@ -77,29 +119,16 @@
 	int i;
 	
-	node = ofw_tree_find_child_by_device_type(ofw_tree_lookup("/"), "cpu");
-	for (i = 0; node; node = ofw_tree_find_peer_by_device_type(node, "cpu"), i++) {
-		uint32_t mid;
-		ofw_tree_property_t *prop;
-		
-		prop = ofw_tree_getprop(node, "upa-portid");
-		if (!prop || !prop->value)
-			continue;
-		
-		mid = *((uint32_t *) prop->value);
-		if (CPU->arch.mid == mid) {
-			/*
-			 * Skip the current CPU.
-			 */
-			continue;
+	if (is_us() || is_us_iii()) {
+		node = ofw_tree_find_child_by_device_type(cpus_parent(), "cpu");
+		for (i = 0; node;
+                     node = ofw_tree_find_peer_by_device_type(node, "cpu"), i++)
+			wakeup_cpu(node);
+	} else if (is_us_iv()) {
+		node = ofw_tree_find_child(cpus_parent(), "cmp");
+		while (node) {
+			wakeup_cpu(ofw_tree_find_child(node, "cpu@0"));
+			wakeup_cpu(ofw_tree_find_child(node, "cpu@1"));
+			node = ofw_tree_find_peer_by_name(node, "cmp");
 		}
-
-		/*
-		 * Processor with ID == mid can proceed with its initialization.
-		 */
-		waking_up_mid = mid;
-		
-		if (waitq_sleep_timeout(&ap_completion_wq, 1000000, SYNCH_FLAGS_NONE) == ESYNCH_TIMEOUT)
-			printf("%s: waiting for processor (mid = %" PRIu32 ") timed out\n",
-			    __func__, mid);
 	}
 }
