

{"id":4327,"date":"2025-08-12T10:10:43","date_gmt":"2025-08-12T10:10:43","guid":{"rendered":"https:\/\/gretchen-ai.com\/?p=4327"},"modified":"2025-08-12T10:23:39","modified_gmt":"2025-08-12T10:23:39","slug":"bicrossmamba-st","status":"publish","type":"post","link":"https:\/\/gretchen-ai.com\/en\/bicrossmamba-st\/","title":{"rendered":"BiCrossMamba-ST: Speech Deepfake Detection with Bidirectional Mamba Spectro-Temporal Cross-Attention."},"content":{"rendered":"<div data-elementor-type=\"wp-post\" data-elementor-id=\"4327\" class=\"elementor elementor-4327\">\n\t\t\t\t<div class=\"elementor-element elementor-element-416c3dd e-flex e-con-boxed wpr-particle-no wpr-jarallax-no wpr-parallax-no wpr-sticky-section-no e-con e-parent\" data-id=\"416c3dd\" data-element_type=\"container\" data-e-type=\"container\">\n\t\t\t\t\t<div class=\"e-con-inner\">\n\t\t\t\t<div class=\"elementor-element elementor-element-6ba988b elementor-widget elementor-widget-spacer\" data-id=\"6ba988b\" data-element_type=\"widget\" data-e-type=\"widget\" data-widget_type=\"spacer.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t\t\t<div class=\"elementor-spacer\">\n\t\t\t<div class=\"elementor-spacer-inner\"><\/div>\n\t\t<\/div>\n\t\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t<div class=\"elementor-element elementor-element-365dfc3 e-flex e-con-boxed wpr-particle-no wpr-jarallax-no wpr-parallax-no wpr-sticky-section-no e-con e-parent\" data-id=\"365dfc3\" data-element_type=\"container\" data-e-type=\"container\">\n\t\t\t\t\t<div class=\"e-con-inner\">\n\t\t\t\t<div class=\"elementor-element elementor-element-2312f9b elementor-widget-divider--view-line elementor-widget elementor-widget-divider\" data-id=\"2312f9b\" data-element_type=\"widget\" data-e-type=\"widget\" data-widget_type=\"divider.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t\t\t<div class=\"elementor-divider\">\n\t\t\t<span class=\"elementor-divider-separator\">\n\t\t\t\t\t\t<\/span>\n\t\t<\/div>\n\t\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t<div class=\"elementor-element elementor-element-f863bd7 e-grid e-con-boxed wpr-particle-no wpr-jarallax-no wpr-parallax-no wpr-sticky-section-no e-con e-parent\" data-id=\"f863bd7\" data-element_type=\"container\" data-e-type=\"container\">\n\t\t\t\t\t<div class=\"e-con-inner\">\n\t\t<div class=\"elementor-element elementor-element-66ea544 e-con-full e-flex wpr-particle-no wpr-jarallax-no wpr-parallax-no wpr-sticky-section-no e-con e-child\" data-id=\"66ea544\" data-element_type=\"container\" data-e-type=\"container\">\n\t\t\t\t<div class=\"elementor-element elementor-element-f7d39d9 elementor-widget elementor-widget-button\" data-id=\"f7d39d9\" data-element_type=\"widget\" data-e-type=\"widget\" data-widget_type=\"button.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t\t\t\t\t<div class=\"elementor-button-wrapper\">\n\t\t\t\t\t<a class=\"elementor-button elementor-button-link elementor-size-sm\" href=\"#\">\n\t\t\t\t\t\t<span class=\"elementor-button-content-wrapper\">\n\t\t\t\t\t\t\t\t\t<span class=\"elementor-button-text\">Study<\/span>\n\t\t\t\t\t<\/span>\n\t\t\t\t\t<\/a>\n\t\t\t\t<\/div>\n\t\t\t\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t<div class=\"elementor-element elementor-element-6473bdd elementor-widget elementor-widget-text-editor\" data-id=\"6473bdd\" data-element_type=\"widget\" data-e-type=\"widget\" data-widget_type=\"text-editor.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t\t\t\t\t<h3>BiCrossMamba-ST<\/h3><h3><span style=\"font-size: 1.23529rem; font-style: inherit;\"> Speech Deepfake Detection with Bidirectional Mamba Spectro-Temporal Cross-Attention.<br \/><\/span><\/h3><p>by Yassine El Kheir<\/p>\t\t\t\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t<div class=\"elementor-element elementor-element-5f3500c elementor-widget elementor-widget-image\" data-id=\"5f3500c\" data-element_type=\"widget\" data-e-type=\"widget\" data-widget_type=\"image.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t<img decoding=\"async\" width=\"150\" height=\"150\" src=\"https:\/\/gretchen-ai.com\/wp-content\/uploads\/2025\/04\/Yassine-150x150.png\" class=\"attachment-thumbnail size-thumbnail wp-image-3979\" alt=\"\" srcset=\"https:\/\/gretchen-ai.com\/wp-content\/uploads\/2025\/04\/Yassine-150x150.png 150w, https:\/\/gretchen-ai.com\/wp-content\/uploads\/2025\/04\/Yassine-300x300.png 300w, https:\/\/gretchen-ai.com\/wp-content\/uploads\/2025\/04\/Yassine-1024x1024.png 1024w, https:\/\/gretchen-ai.com\/wp-content\/uploads\/2025\/04\/Yassine-768x768.png 768w, https:\/\/gretchen-ai.com\/wp-content\/uploads\/2025\/04\/Yassine-12x12.png 12w, https:\/\/gretchen-ai.com\/wp-content\/uploads\/2025\/04\/Yassine-1320x1320.png 1320w, https:\/\/gretchen-ai.com\/wp-content\/uploads\/2025\/04\/Yassine.png 1511w\" sizes=\"(max-width: 150px) 100vw, 150px\" \/>\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t<div class=\"elementor-element elementor-element-7444a8e elementor-widget elementor-widget-image\" data-id=\"7444a8e\" data-element_type=\"widget\" data-e-type=\"widget\" data-widget_type=\"image.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t<img fetchpriority=\"high\" decoding=\"async\" width=\"1902\" height=\"2560\" src=\"https:\/\/gretchen-ai.com\/wp-content\/uploads\/2025\/08\/speech-waves-scaled.jpg\" class=\"attachment-full size-full wp-image-4190\" alt=\"\" srcset=\"https:\/\/gretchen-ai.com\/wp-content\/uploads\/2025\/08\/speech-waves-scaled.jpg 1902w, https:\/\/gretchen-ai.com\/wp-content\/uploads\/2025\/08\/speech-waves-223x300.jpg 223w, https:\/\/gretchen-ai.com\/wp-content\/uploads\/2025\/08\/speech-waves-761x1024.jpg 761w, https:\/\/gretchen-ai.com\/wp-content\/uploads\/2025\/08\/speech-waves-768x1034.jpg 768w, https:\/\/gretchen-ai.com\/wp-content\/uploads\/2025\/08\/speech-waves-1141x1536.jpg 1141w, https:\/\/gretchen-ai.com\/wp-content\/uploads\/2025\/08\/speech-waves-1522x2048.jpg 1522w, https:\/\/gretchen-ai.com\/wp-content\/uploads\/2025\/08\/speech-waves-9x12.jpg 9w, https:\/\/gretchen-ai.com\/wp-content\/uploads\/2025\/08\/speech-waves-1320x1777.jpg 1320w\" sizes=\"(max-width: 1902px) 100vw, 1902px\" \/>\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t<div class=\"elementor-element elementor-element-97ba46e e-flex e-con-boxed wpr-particle-no wpr-jarallax-no wpr-parallax-no wpr-sticky-section-no e-con e-parent\" data-id=\"97ba46e\" data-element_type=\"container\" data-e-type=\"container\">\n\t\t\t\t\t<div class=\"e-con-inner\">\n\t\t\t\t<div class=\"elementor-element elementor-element-3a7e768 elementor-widget elementor-widget-text-editor\" data-id=\"3a7e768\" data-element_type=\"widget\" data-e-type=\"widget\" data-widget_type=\"text-editor.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t\t\t\t\t<div>Voice synthesis technology has reached a point where AI can create convincing imitations of anyone&#8217;s voice from just a few minutes of audio. This poses serious risks to:<\/div><div>\u00a0<\/div><ul><li><strong>Banking systems<\/strong>\u00a0that rely on voice authentication<\/li><li><strong>Legal proceedings<\/strong>\u00a0where audio evidence is crucial<\/li><li><strong>Personal security<\/strong>\u00a0against impersonation attacks<\/li><\/ul><div>and many more<\/div><div>\u00a0<\/div><div>Traditional detection methods are struggling to keep pace with these rapidly evolving deepfake techniques.<\/div><div>\u00a0<\/div><div><strong>Yassine El Kheir<\/strong>\u00a0(I will link his profile in the name) and other researchers (should I include names) from the Speech and Language Technology, DFKI, Germany and Technical University of Berlin have developed BiCrossMamba-ST, a novel detection system that outperforms existing methods by substantial margins.<\/div><div>\u00a0<\/div><div>The breakthrough lies in its dual-perspective analysis: By processing spectral sub-bands and temporal intervals separately and then integrating their representations, BiCrossMamba-ST effectively captures the subtle cues of synthetic speech.<\/div><div>\u00a0<\/div><div>The performance improvements are striking:<\/div><div>\u00a0<\/div><ul><li><strong>28.2%<\/strong>\u00a0fewer parameters while maintaining superior accuracy<\/li><li><strong>67.74%<\/strong>\u00a0and\u00a0<strong>26.3%<\/strong>\u00a0improvement over other state-of-the-art models like AASIST on the ASVspoof19 and ASVspoofDF21 benchmark datasets, respectively<\/li><\/ul><div>and then attach paper (link or PDF)<\/div>\t\t\t\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t<div class=\"elementor-element elementor-element-99e572c e-flex e-con-boxed wpr-particle-no wpr-jarallax-no wpr-parallax-no wpr-sticky-section-no e-con e-parent\" data-id=\"99e572c\" data-element_type=\"container\" data-e-type=\"container\">\n\t\t\t\t\t<div class=\"e-con-inner\">\n\t\t\t\t<div class=\"elementor-element elementor-element-231b545 elementor-widget elementor-widget-spacer\" data-id=\"231b545\" data-element_type=\"widget\" data-e-type=\"widget\" data-widget_type=\"spacer.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t\t\t<div class=\"elementor-spacer\">\n\t\t\t<div class=\"elementor-spacer-inner\"><\/div>\n\t\t<\/div>\n\t\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t<\/div>","protected":false},"excerpt":{"rendered":"<p>Study BiCrossMamba-ST Speech Deepfake Detection with Bidirectional Mamba Spectro-Temporal Cross-Attention. by Yassine El Kheir Voice synthesis technology has reached a point where AI can create convincing imitations of anyone&#8217;s voice from just a few minutes of audio. This poses serious risks to:\u00a0 Banking systems\u00a0that rely on voice authentication Legal proceedings\u00a0where audio evidence is crucial Personal security\u00a0against impersonation attacks and many more\u00a0Traditional detection methods are struggling to keep pace with these rapidly evolving deepfake techniques.\u00a0Yassine El Kheir\u00a0(I will link his profile in the name) and other researchers (should I include names) from the Speech and Language Technology, DFKI, Germany and Technical University of Berlin have developed BiCrossMamba-ST, a novel detection system that outperforms existing methods by substantial margins.\u00a0The breakthrough lies in its dual-perspective analysis: By processing spectral sub-bands and temporal intervals separately and then integrating their representations, BiCrossMamba-ST effectively captures the subtle cues of synthetic speech.\u00a0The performance improvements are striking:\u00a0 28.2%\u00a0fewer parameters while maintaining superior accuracy 67.74%\u00a0and\u00a026.3%\u00a0improvement over other state-of-the-art models like AASIST on the ASVspoof19 and ASVspoofDF21 benchmark datasets, respectively and then attach paper (link or PDF)<\/p>","protected":false},"author":2,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"_monsterinsights_skip_tracking":false,"_monsterinsights_sitenote_active":false,"_monsterinsights_sitenote_note":"","_monsterinsights_sitenote_category":0,"site-sidebar-layout":"default","site-content-layout":"","ast-site-content-layout":"default","site-content-style":"default","site-sidebar-style":"default","ast-global-header-display":"","ast-banner-title-visibility":"","ast-main-header-display":"","ast-hfb-above-header-display":"","ast-hfb-below-header-display":"","ast-hfb-mobile-header-display":"","site-post-title":"","ast-breadcrumbs-content":"","ast-featured-img":"","footer-sml-layout":"","theme-transparent-header-meta":"","adv-header-id-meta":"","stick-header-meta":"","header-above-stick-meta":"","header-main-stick-meta":"","header-below-stick-meta":"","astra-migrate-meta-layouts":"set","ast-page-background-enabled":"default","ast-page-background-meta":{"desktop":{"background-color":"","background-image":"","background-repeat":"repeat","background-position":"center center","background-size":"auto","background-attachment":"scroll","background-type":"","background-media":"","overlay-type":"","overlay-color":"","overlay-opacity":"","overlay-gradient":""},"tablet":{"background-color":"","background-image":"","background-repeat":"repeat","background-position":"center center","background-size":"auto","background-attachment":"scroll","background-type":"","background-media":"","overlay-type":"","overlay-color":"","overlay-opacity":"","overlay-gradient":""},"mobile":{"background-color":"","background-image":"","background-repeat":"repeat","background-position":"center center","background-size":"auto","background-attachment":"scroll","background-type":"","background-media":"","overlay-type":"","overlay-color":"","overlay-opacity":"","overlay-gradient":""}},"ast-content-background-meta":{"desktop":{"background-color":"var(--ast-global-color-5)","background-image":"","background-repeat":"repeat","background-position":"center center","background-size":"auto","background-attachment":"scroll","background-type":"","background-media":"","overlay-type":"","overlay-color":"","overlay-opacity":"","overlay-gradient":""},"tablet":{"background-color":"var(--ast-global-color-5)","background-image":"","background-repeat":"repeat","background-position":"center center","background-size":"auto","background-attachment":"scroll","background-type":"","background-media":"","overlay-type":"","overlay-color":"","overlay-opacity":"","overlay-gradient":""},"mobile":{"background-color":"var(--ast-global-color-5)","background-image":"","background-repeat":"repeat","background-position":"center center","background-size":"auto","background-attachment":"scroll","background-type":"","background-media":"","overlay-type":"","overlay-color":"","overlay-opacity":"","overlay-gradient":""}},"footnotes":""},"categories":[1],"tags":[],"class_list":["post-4327","post","type-post","status-publish","format-standard","hentry","category-uncategorized"],"aioseo_notices":[],"_links":{"self":[{"href":"https:\/\/gretchen-ai.com\/en\/wp-json\/wp\/v2\/posts\/4327","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/gretchen-ai.com\/en\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/gretchen-ai.com\/en\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/gretchen-ai.com\/en\/wp-json\/wp\/v2\/users\/2"}],"replies":[{"embeddable":true,"href":"https:\/\/gretchen-ai.com\/en\/wp-json\/wp\/v2\/comments?post=4327"}],"version-history":[{"count":16,"href":"https:\/\/gretchen-ai.com\/en\/wp-json\/wp\/v2\/posts\/4327\/revisions"}],"predecessor-version":[{"id":4343,"href":"https:\/\/gretchen-ai.com\/en\/wp-json\/wp\/v2\/posts\/4327\/revisions\/4343"}],"wp:attachment":[{"href":"https:\/\/gretchen-ai.com\/en\/wp-json\/wp\/v2\/media?parent=4327"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/gretchen-ai.com\/en\/wp-json\/wp\/v2\/categories?post=4327"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/gretchen-ai.com\/en\/wp-json\/wp\/v2\/tags?post=4327"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}