Compare commits

...

3 Commits

Author SHA1 Message Date
Jiang, Fish 5a6a0dd7e1 vulkan: add INTEL_XE1 arch enum and enable coopmat1 on Intel Xe-LPG Plus (#24404)
* vulkan: add INTEL_PRE_XE2 arch enum and enable coopmat1 on Intel Xe-LPG Plus (1/3, Xe1-ARLH)

Co-authored-by: Xia, Jie <jie.xia@intel.com>
Co-authored-by: Liu, Russell <russell.liu@intel.com>

* Address comments of bf16 and trailing whitespace

* Rename INTEL_PRE_XE2 to INTEL_XE1 and remove driver workaround

* Add Windows driver check

---------

Co-authored-by: Xia, Jie <jie.xia@intel.com>
Co-authored-by: Liu, Russell <russell.liu@intel.com>
2026-06-26 13:26:22 +02:00
Sanjay Ahari ded1561b42 ui: fix accessibility for hover-gated interactive elements assisted by claude(in debugging and tests) (#24727) 2026-06-26 12:55:38 +02:00
Jeff Bolz 9df06805ee vulkan: Workaround compiler bug in conv2d coopmat2 path (#24924)
* vulkan: Workaround compiler bug in conv2d coopmat2 path

* apply same workaround to CONV_3D

* Apply suggestion from @jeffbolznv
2026-06-26 11:53:32 +02:00
7 changed files with 81 additions and 93 deletions
+18 -8
View File
@@ -308,6 +308,7 @@ enum vk_device_architecture {
AMD_RDNA1,
AMD_RDNA2,
AMD_RDNA3,
INTEL_XE1,
INTEL_XE2,
NVIDIA_PRE_TURING,
NVIDIA_TURING,
@@ -365,21 +366,26 @@ static vk_device_architecture get_device_architecture(const vk::PhysicalDevice&
const std::vector<vk::ExtensionProperties> ext_props = device.enumerateDeviceExtensionProperties();
bool subgroup_size_control = false;
bool integer_dot_product = false;
for (const auto& properties : ext_props) {
if (strcmp("VK_EXT_subgroup_size_control", properties.extensionName) == 0) {
subgroup_size_control = true;
} else if (strcmp("VK_KHR_shader_integer_dot_product", properties.extensionName) == 0) {
integer_dot_product = true;
}
}
if (!subgroup_size_control) {
if (!subgroup_size_control || !integer_dot_product) {
return vk_device_architecture::OTHER;
}
vk::PhysicalDeviceProperties2 props2;
vk::PhysicalDeviceSubgroupSizeControlPropertiesEXT subgroup_size_control_props;
vk::PhysicalDeviceShaderIntegerDotProductPropertiesKHR integer_dot_props;
props2.pNext = &subgroup_size_control_props;
subgroup_size_control_props.pNext = &integer_dot_props;
device.getProperties2(&props2);
if (subgroup_size_control_props.minSubgroupSize == 16) {
@@ -388,6 +394,9 @@ static vk_device_architecture get_device_architecture(const vk::PhysicalDevice&
// https://www.intel.com/content/www/us/en/content-details/824434/2024-intel-tech-tour-xe2-and-lunar-lake-s-gpu.html
// https://www.intel.com/content/www/us/en/docs/oneapi/optimization-guide-gpu/2025-0/intel-xe-gpu-architecture.html
return vk_device_architecture::INTEL_XE2;
} else if (subgroup_size_control_props.minSubgroupSize == 8 &&
integer_dot_product && integer_dot_props.integerDotProduct4x8BitPackedSignedAccelerated) {
return vk_device_architecture::INTEL_XE1;
}
} else if (props.vendorID == VK_VENDOR_ID_NVIDIA) {
const std::vector<vk::ExtensionProperties> ext_props = device.enumerateDeviceExtensionProperties();
@@ -3837,7 +3846,7 @@ static void ggml_vk_load_shaders(vk_device& device, vk_pipeline requested) {
l_warptile = { 256, 128, 128, 16, subgroup_size_8, 64, 2, tm_m, tn_m, tk_m, subgroup_size_8 };
l_warptile_mmq = l_warptile_mmq_int = { 256, 128, 128, 32, subgroup_size_8, 64, 2, tm_m, tn_m, tk_m, subgroup_size_8 };
l_warptile_mmq_int_k = { 256, 128, 128, 32, subgroup_size_16, 64, 1, 4, 2, 1, subgroup_size_16 };
} else if (device->vendor_id == VK_VENDOR_ID_INTEL && device->coopmat_support && device->architecture == INTEL_XE2) {
} else if (device->vendor_id == VK_VENDOR_ID_INTEL && device->coopmat_support) {
// Xe2/Xe3 with coopmat enabled - warptile performance tuning
l_warptile = { 512, 128, 128, 16, subgroup_size_8, 32, 2, tm_m, tn_m, tk_m, subgroup_size_8 };
l_warptile_mmq = { 512, 128, 128, 32, subgroup_size_8, 32, 2, tm_m, tn_m, tk_m, subgroup_size_8 };
@@ -6361,9 +6370,8 @@ static vk_device ggml_vk_get_device(size_t idx) {
break;
case VK_VENDOR_ID_INTEL: {
// Current Windows driver does not expose BF16 support.
// We only want to use l_warptile if coopmat is available and is Xe2+
const bool xe2_with_coopmat = device->coopmat_support && device->architecture == INTEL_XE2;
const bool use_l_warptile = (i == GGML_TYPE_BF16) ? (device->coopmat_bf16_support && xe2_with_coopmat) : xe2_with_coopmat;
// We only want to use l_warptile if coopmat is available
const bool use_l_warptile = (i == GGML_TYPE_BF16) ? (device->coopmat_bf16_support && device->coopmat_support) : device->coopmat_support;
device->mul_mat_l[i] = use_l_warptile;
device->mul_mat_id_l[i] = use_l_warptile;
device->mul_mat_m[i] = true;
@@ -17890,9 +17898,9 @@ static bool ggml_vk_device_is_supported(const vk::PhysicalDevice & vkdev) {
static bool ggml_vk_khr_cooperative_matrix_support(const vk::PhysicalDeviceProperties& props, const vk::PhysicalDeviceDriverProperties& driver_props, vk_device_architecture arch) {
switch (props.vendorID) {
case VK_VENDOR_ID_INTEL:
// Only allowing Xe2 GPU at the moment since Xe2 GPU can gain significant performance boost,
// while some older hardware (ex. Arc A770) has performance regressions
return arch == vk_device_architecture::INTEL_XE2;
// Only allowing Xe2/Xe3 GPU and integrated Xe GPUs at the moment since older hardware (ex. Arc A770) has performance regressions.
return (arch == vk_device_architecture::INTEL_XE2) ||
(arch == vk_device_architecture::INTEL_XE1 && props.deviceType == vk::PhysicalDeviceType::eIntegratedGpu && driver_props.driverID == vk::DriverId::eIntelProprietaryWindows);
case VK_VENDOR_ID_AMD:
if (driver_props.driverID == vk::DriverId::eAmdProprietary || driver_props.driverID == vk::DriverId::eAmdOpenSource) {
// Workaround for AMD proprietary driver reporting support on all GPUs
@@ -17940,6 +17948,8 @@ static uint32_t ggml_vk_intel_shader_core_count(const vk::PhysicalDevice& vkdev)
case 0xE20B: // B580
case 0xE211: // Pro B60
return 20;
case 0xB080: // PTL Xe3 LPG 2x6 (12 subslices)
return 12;
default:
return 0;
}
@@ -158,7 +158,7 @@ const uint32_t Csh_stride = BS_NPQ;
#ifdef COOPMAT
const uint32_t Csh_len = BS_K * Csh_stride;
#else
const uint32_t Csh_len = csh_store != 0 ? BS_K * Csh_stride : 1;
const uint32_t Csh_len = csh_store != 0 ? BS_K * Csh_stride : 8; // 8 to workaround compiler bug
#endif
shared SHMEM_TYPE Csh[Csh_len]; // K x NPQ
#endif
@@ -144,7 +144,7 @@ const uint32_t Csh_stride = BS_NPQ;
#ifdef COOPMAT
const uint32_t Csh_len = BS_K * Csh_stride;
#else
const uint32_t Csh_len = csh_store != 0 ? BS_K * Csh_stride : 1;
const uint32_t Csh_len = csh_store != 0 ? BS_K * Csh_stride : 8; // 8 to workaround compiler bug
#endif
shared SHMEM_TYPE Csh[Csh_len]; // K x NPQ
#endif
+3
View File
@@ -7973,6 +7973,9 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
}
}
}
for (auto kernel_type : {GGML_TYPE_F32, GGML_TYPE_F16}) {
test_cases.emplace_back(new test_conv_2d({ 256, 256, 192, 1 }, { 3, 3, 192, 96 }, kernel_type, 1, 1, 1, 1, 1, 1, false));
}
// sycl backend will limit task global_range < MAX_INT
// test cases for 2D im2col with large input W and H (occurs in stable-diffusion)
@@ -33,7 +33,7 @@
{#if !readonly && onRemove}
<div
class="absolute top-10 right-2 flex items-center justify-center opacity-0 transition-opacity group-hover:opacity-100"
class="absolute top-10 right-2 flex items-center justify-center opacity-0 transition-opacity group-focus-within:opacity-100 group-hover:opacity-100"
>
<ActionIcon icon={X} tooltip="Remove" stopPropagationOnClick onclick={() => onRemove?.()} />
</div>
@@ -56,7 +56,7 @@
<div class="relative flex h-6 items-center justify-between">
<div class="right-0 flex items-center gap-2 opacity-100 transition-opacity">
<div
class="pointer-events-auto inset-0 flex items-center gap-1 opacity-0 transition-all duration-150 group-hover:opacity-100"
class="pointer-events-auto inset-0 flex items-center gap-1 opacity-0 transition-all duration-150 group-focus-within:opacity-100 group-hover:opacity-100"
>
<ActionIcon icon={Edit} tooltip="Edit" onclick={editCtx.handleEdit} />
<ActionIcon icon={Trash2} tooltip="Delete" onclick={onDelete} />
@@ -39,7 +39,6 @@
depth = 0
}: Props = $props();
let renderActionsDropdown = $state(false);
let dropdownOpen = $state(false);
let isLoading = $derived(getAllLoadingChats().includes(conversation.id));
@@ -71,26 +70,10 @@
}
}
function handleMouseLeave() {
if (!dropdownOpen) {
renderActionsDropdown = false;
}
}
function handleMouseOver() {
renderActionsDropdown = true;
}
function handleSelect() {
onSelect?.(conversation.id);
}
$effect(() => {
if (!dropdownOpen) {
renderActionsDropdown = false;
}
});
onMount(() => {
document.addEventListener('edit-active-conversation', handleGlobalEditEvent as EventListener);
@@ -103,23 +86,19 @@
});
</script>
<!-- svelte-ignore a11y_mouse_events_have_key_events -->
<button
class="group flex min-h-9 w-full cursor-pointer items-center justify-between space-x-3 rounded-lg py-1.5 text-left transition-colors hover:bg-foreground/10 {isActive
<div
class="conversation-item group relative flex min-h-9 w-full items-center justify-between space-x-3 rounded-lg py-1.5 transition-colors hover:bg-foreground/10 {isActive
? 'bg-foreground/5 text-accent-foreground'
: ''} px-3"
onclick={handleSelect}
onmouseover={handleMouseOver}
onmouseleave={handleMouseLeave}
onfocusin={handleMouseOver}
onfocusout={(e) => {
if (!e.currentTarget.contains(e.relatedTarget as Node | null)) {
handleMouseLeave();
}
}}
>
<button
class="absolute inset-0 z-0 cursor-pointer rounded-lg focus:outline-none focus-visible:ring-2 focus-visible:ring-ring"
onclick={handleSelect}
aria-label={conversation.name}
>
</button>
<div
class="flex min-w-0 flex-1 items-center gap-2"
class="pointer-events-none relative z-10 flex min-w-0 flex-1 items-center gap-2"
style:padding-left="{depth * FORK_TREE_DEPTH_PADDING}px"
>
{#if depth > 0}
@@ -130,7 +109,7 @@
<a
{...props}
href={RouterService.chat(conversation.forkedFromConversationId)}
class="flex shrink-0 items-center text-muted-foreground transition-colors hover:text-foreground"
class="pointer-events-auto flex shrink-0 items-center text-muted-foreground transition-colors hover:text-foreground"
>
<GitBranch class="h-3.5 w-3.5" />
</a>
@@ -146,18 +125,15 @@
{#if isLoading}
<Tooltip.Root>
<Tooltip.Trigger>
<div
class="stop-button flex h-4 w-4 shrink-0 cursor-pointer items-center justify-center rounded text-muted-foreground transition-colors hover:text-foreground"
<button
class="stop-button pointer-events-auto flex h-4 w-4 shrink-0 cursor-pointer items-center justify-center rounded text-muted-foreground transition-colors hover:text-foreground"
onclick={handleStop}
onkeydown={(e) => e.key === 'Enter' && handleStop(e)}
role="button"
tabindex="0"
aria-label="Stop generation"
>
<Loader2 class="loading-icon h-3.5 w-3.5 animate-spin" />
<Square class="stop-icon hidden h-3 w-3 fill-current text-destructive" />
</div>
</button>
</Tooltip.Trigger>
<Tooltip.Content>
@@ -169,52 +145,50 @@
<TruncatedText text={conversation.name} class="text-sm font-medium" showTooltip={false} />
</div>
{#if renderActionsDropdown}
<div class="actions flex items-center">
<DropdownMenuActions
triggerIcon={MoreHorizontal}
triggerTooltip="More actions"
bind:open={dropdownOpen}
actions={[
{
icon: conversation.pinned ? PinOff : Pin,
label: conversation.pinned ? 'Unpin' : 'Pin',
onclick: (e: Event) => {
e.stopPropagation();
handleTogglePin();
}
},
{
icon: Pencil,
label: 'Edit',
onclick: handleEdit,
shortcut: ['shift', 'cmd', 'e']
},
{
icon: Download,
label: 'Export',
onclick: (e: Event) => {
e.stopPropagation();
conversationsStore.downloadConversation(conversation.id);
},
shortcut: ['shift', 'cmd', 's']
},
{
icon: Trash2,
label: 'Delete',
onclick: handleDelete,
variant: 'destructive',
shortcut: ['shift', 'cmd', 'd'],
separator: true
<div class="actions pointer-events-auto relative z-20 flex items-center">
<DropdownMenuActions
triggerIcon={MoreHorizontal}
triggerTooltip="More actions"
bind:open={dropdownOpen}
actions={[
{
icon: conversation.pinned ? PinOff : Pin,
label: conversation.pinned ? 'Unpin' : 'Pin',
onclick: (e: Event) => {
e.stopPropagation();
handleTogglePin();
}
]}
/>
</div>
{/if}
</button>
},
{
icon: Pencil,
label: 'Edit',
onclick: handleEdit,
shortcut: ['shift', 'cmd', 'e']
},
{
icon: Download,
label: 'Export',
onclick: (e: Event) => {
e.stopPropagation();
conversationsStore.downloadConversation(conversation.id);
},
shortcut: ['shift', 'cmd', 's']
},
{
icon: Trash2,
label: 'Delete',
onclick: handleDelete,
variant: 'destructive',
shortcut: ['shift', 'cmd', 'd'],
separator: true
}
]}
/>
</div>
</div>
<style>
button {
.conversation-item {
:global([data-slot='dropdown-menu-trigger']:not([data-state='open'])) {
opacity: 0;
}
@@ -239,7 +213,8 @@
}
}
&:is(:hover) .stop-button {
&:is(:hover) .stop-button,
&:focus-within .stop-button {
:global(.stop-icon) {
display: block;
}