index.html

<!DOCTYPE html>
<html>
<head>
  <meta charset="utf-8">
  <!-- Meta tags for social media banners, these should be filled in appropriatly as they are your "business card" -->
  <!-- Replace the content tag with appropriate information -->
  <meta name="description" content="This study introduces a novel self-supervised pre-training approach using diffusion models for landmark detection in X-ray images, significantly outperforming existing methods across three datasets with minimal annotated data (1-50 images).">
  <!-- Keywords for your paper to be indexed by-->
  <meta name="keywords" content="Landmark detection, Diffusion models, DDPM, Self-supervised Learning, Few-shot Learning, X-rays">
  <meta name="viewport" content="width=device-width, initial-scale=1">


  <title>Diffusion models for Landmark detection</title>
  <link rel="icon" type="image/x-icon" href="static/images/favicon.ico">
  <link href="https://fonts.googleapis.com/css?family=Google+Sans|Noto+Sans|Castoro"
  rel="stylesheet">

  <link rel="stylesheet" href="static/css/bulma.min.css">
  <link rel="stylesheet" href="static/css/bulma-carousel.min.css">
  <link rel="stylesheet" href="static/css/bulma-slider.min.css">
  <link rel="stylesheet" href="static/css/fontawesome.all.min.css">
  <link rel="stylesheet"
  href="https://cdn.jsdelivr.net/gh/jpswalsh/academicons@1/css/academicons.min.css">
  <link rel="stylesheet" href="static/css/index.css">

  <script src="https://ajax.googleapis.com/ajax/libs/jquery/3.5.1/jquery.min.js"></script>
  <script src="https://documentcloud.adobe.com/view-sdk/main.js"></script>
  <script defer src="static/js/fontawesome.all.min.js"></script>
  <script src="static/js/bulma-carousel.min.js"></script>
  <script src="static/js/bulma-slider.min.js"></script>
  <script src="static/js/index.js"></script>
</head>
<body>


  <section class="hero">
    <div class="hero-body">
      <div class="container is-max-desktop">
        <div class="columns is-centered">
          <div class="column has-text-centered">
            <h1 class="title is-1 publication-title" style="font-size: 40px;">
              Self-supervised pre-training with diffusion model <br>for few-shot landmark
              detection in x-ray images</h1>
            <div class="is-size-5 publication-authors">
              <!-- Paper authors -->
              <span class="author-block">
                <a href="https://scholar.google.com/citations?user=qGS6cv4AAAAJ=en" target="_blank">Roberto Di Via</a>,</span>
                <span class="author-block">
                  <a href="https://scholar.google.com/citations?user=riK7DscAAAAJ&hl=en" target="_blank">Francesca Odone</a>,</span>
                  <span class="author-block">
                    <a href="https://scholar.google.com/citations?user=-boYCXcAAAAJ&hl=en" target="_blank">Vito Paolo Pastore</a>
                  </span>
                  </div>

                  <div class="is-size-5 publication-authors">
                    <span class="author-block">MaLGa Center, DIBRIS, University of Genoa<br>Winter Conference on Applications of Computer Vision (WACV) 2025</span>
                    <!-- <span class="eql-cntrb"><small><br><sup>*</sup>Indicates Equal Contribution</small></span> -->
                  </div>

                  <div class="column has-text-centered">
                    <div class="publication-links">
                         <!-- Arxiv PDF link -->
                      <span class="link-block">
                        <a href="https://arxiv.org/pdf/2407.18125" target="_blank"
                        class="external-link button is-normal is-rounded is-dark">
                        <span class="icon">
                          <i class="fas fa-file-pdf"></i>
                        </span>
                        <span>Paper</span>
                      </a>
                    </span>

                    <!-- Supplementary PDF link -->
                    <span class="link-block">
                      <a href="static/pdfs/SupplementaryMaterials.pdf" target="_blank"
                      class="external-link button is-normal is-rounded is-dark">
                      <span class="icon">
                        <i class="fas fa-file-pdf"></i>
                      </span>
                      <span>Supplementary</span>
                    </a>
                  </span>

                  <!-- Github link -->
                  <span class="link-block">
                    <a href="https://github.com/Malga-Vision/DiffusionXray-FewShot-LandmarkDetection" target="_blank"
                    class="external-link button is-normal is-rounded is-dark">
                    <span class="icon">
                      <i class="fab fa-github"></i>
                    </span>
                    <span>Code</span>
                  </a>
                </span>

                <!-- ArXiv abstract Link -->
                <span class="link-block">
                  <a href="https://arxiv.org/abs/2407.18125" target="_blank"
                  class="external-link button is-normal is-rounded is-dark">
                  <span class="icon">
                    <i class="ai ai-arxiv"></i>
                  </span>
                  <span>arXiv</span>
                </a>
              </span>
            </div>
          </div>
        </div>
      </div>
    </div>
  </div>
</section>


<!-- Teaser video-->
<section class="hero teaser">
  <div class="container is-max-desktop">
    <div class="hero-body">
      
      <image src="static/images/heatmaps_wacv.png" alt="Our qualitative results" style="width:100%">
      <!--
      <video poster="" id="tree" autoplay controls muted loop height="100%">
        <source src="static/videos/banner_video.mp4"
        type="video/mp4">
      </video>
      <h2 class="subtitle has-text-centered">
        Aliquam vitae elit ullamcorper tellus egestas pellentesque. Ut lacus tellus, maximus vel lectus at, placerat pretium mi. Maecenas dignissim tincidunt vestibulum. Sed consequat hendrerit nisl ut maximus. 
      </h2> 
      -->
    </div>
  </div>
</section>
<!-- End teaser video -->

<!-- Paper abstract -->
<section class="section hero is-light">
  <div class="container is-max-desktop">
    <div class="columns is-centered has-text-centered">
      <div class="column is-four-fifths">
        <h2 class="title is-3">Abstract</h2>
        <div class="content has-text-justified">
          <p>
            <b>Deep neural networks</b> have been extensively applied in the medical domain for various tasks, <b>including</b> image classification, segmentation, and <b>landmark detection</b>. However, their application is often
            <b>hindered by data scarcity</b>, both in terms of available annotations and images. This study introduces
            a novel application of denoising diffusion probabilistic models (DDPMs) to landmark detection
            task, specifically addressing the challenge of limited annotated data in x-ray imaging. Our key
            innovation lies in leveraging DDPMs for self-supervised pre-training in landmark detection, a
            previously unexplored approach in this domain. <b>This method enables accurate landmark detection
            with minimal annotated training data</b> (as few as 50 images), surpassing both ImageNet supervised
            pre-training and traditional self-supervised techniques across three popular x-ray benchmark datasets.
            To our knowledge, <b>this work represents the first application of diffusion models for self-supervised
            learning in landmark detection</b>, which may offer a valuable pre-training approach in few-shot regimes,
            for mitigating data scarcity
          </p>
        </div>
      </div>
    </div>
  </div>
</section>
<!-- End paper abstract -->


<!-- Introduction + Pipeline-->
<section class="section hero is-small">
    <div class="container is-max-desktop">
      <div class="columns is-centered has-text-centered">
        <div class="column">
          <h2 class="title is-3">Introduction</h2>
          <div class="content has-text-justified">
            <p>
              This paper introduces a novel application of denoising diffusion probabilistic models (DDPMs) for anatomical landmark detection in X-ray images, specifically <b>addressing the challenge of limited annotated data</b>. The key innovation lies in leveraging <b>DDPMs for self-supervised pre-training in landmark detection</b>, a previously unexplored approach in this domain. The method enables accurate landmark detection with minimal annotated training data (as few as 1-50 images), significantly <b>outperforming both ImageNet supervised pre-training and traditional self-supervised techniques</b> across three popular X-ray benchmark datasets (Chest, Cephalometric, and Hand). A comprehensive evaluation against state-of-the-art alternatives, <b>including YOLO</b>, demonstrates the approach's effectiveness even when pre-trained on one in-domain dataset and fine-tuned on smaller, distinct datasets.
            </p>
          </div>
        </div>
      </div>
      <div class="columns is-centered">
        <div class="column">
          <img src="static/images/ddpm_pipeline.png" alt="Our pipeline" style="width:100%">
        </div>
      </div>
    </div>
  </div>
</section>


<!-- Introduction + Pipeline-->
<section class="section hero is-light">
    <div class="container is-max-desktop">
      <div class="columns is-centered has-text-centered">
        <div class="column">
          <h2 class="title is-3">Few-shot Landmark Detection Results</h2>
          <div class="content has-text-justified">
            <p>
              The paper evaluates the effectiveness of DDPM self-supervised pre-training for landmark detection by <b>benchmarking it against supervised ImageNet pre-training, self-supervised state-of-the-art methods (MoCoV3, SimCLRV2, and DINO), and the YOLO framework</b> across three X-ray datasets: Chest, Cephalometric, and Hand. The proposed approach consistently outperforms both ImageNet and alternative SSL methods across all datasets and training image quantities, with particularly <b>impressive results</b> in low-data regimes. For instance, <b>with just one labeled sample</b> in the Chest dataset, DDPM achieves a Mean Radial Error (MRE) of 14.99px compared to ImageNet's 143.67px, representing an 89.6% improvement. Similar significant performance gains are observed in the Cephalometric dataset (15.71mm vs 86.71mm MRE) and Hand dataset (28.75mm vs 79.32mm MRE). When compared to YOLO, a state-of-the-art universal anatomical landmark detection model that uses mixed dataset training, DDPM achieves competitive or superior results using just one labeled sample. <b>These results demonstrate the method's effectiveness in few-shot learning scenarios, which are common in medical imaging where annotated data is scarce.</b>
            </p>
          </div>
        </div>
      </div>

      <div class="columns is-centered">
        <div class="column">
          <h3 class="title is-4">Chest Few-shot Results</h3>
          <img src="static/images/chest_label_efficient.png" alt="Chest few-shot results" style="width:100%">
        </div>
      </div>

      <div class="columns is-centered">
        <div class="column">
          <h3 class="title is-4">Cephalometric Few-shot Results</h3>
          <img src="static/images/cephalo_label_efficient.png" alt="Cephalometric few-shot results" style="width:100%">
        </div>
      </div>

      <div class="columns is-centered">
        <div class="column">
          <h3 class="title is-4">Hand Few-shot Results</h3>
          <img src="static/images/hand_label_efficient.png" alt="Hand few-shot results" style="width:100%">
        </div>
      </div>

      <div class="columns is-centered">
        <div class="column">
          <h3 class="title is-4">Comparison with the state-of-the-art YOLO framework</h3>
          <img src="static/images/sota_results.PNG" alt="Comparison with YOLO framework" style="width:100%">
        </div>
      </div>

      </div>
    </div>
  </div>
</section>


<!-- Image carousel -->
<!--
<section class="hero is-small">
  <div class="hero-body">
    <div class="container">
      <div id="results-carousel" class="carousel results-carousel">
       <div class="item">
        <img src="static/images/carousel1.jpg" alt="MY ALT TEXT"/>
        <h2 class="subtitle has-text-centered">
          First image description.
        </h2>
      </div>
      <div class="item">
        <img src="static/images/carousel2.jpg" alt="MY ALT TEXT"/>
        <h2 class="subtitle has-text-centered">
          Second image description.
        </h2>
      </div>
      <div class="item">
        <img src="static/images/carousel3.jpg" alt="MY ALT TEXT"/>
        <h2 class="subtitle has-text-centered">
         Third image description.
       </h2>
     </div>
     <div class="item">
      <img src="static/images/carousel4.jpg" alt="MY ALT TEXT"/>
      <h2 class="subtitle has-text-centered">
        Fourth image description.
      </h2>
    </div>
  </div>
</div>
</div>
</section>
-->
<!-- End image carousel -->


<!-- Youtube video -->
<!--
<section class="hero is-small is-light">
  <div class="hero-body">
    <div class="container">
      <h2 class="title is-3">Video Presentation</h2>
      <div class="columns is-centered has-text-centered">
        <div class="column is-four-fifths">
          
          <div class="publication-video">
            <iframe src="https://www.youtube.com/embed/JkaxUblCGz0" frameborder="0" allow="autoplay; encrypted-media" allowfullscreen></iframe>
          </div>
        </div>
      </div>
    </div>
  </div>
</section>
-->
<!-- End youtube video -->


<!-- Video carousel -->
<!--
<section class="hero is-small">
  <div class="hero-body">
    <div class="container">
      <h2 class="title is-3">Another Carousel</h2>
      <div id="results-carousel" class="carousel results-carousel">
        <div class="item item-video1">
          <video poster="" id="video1" autoplay controls muted loop height="100%">
            <source src="static/videos/carousel1.mp4"
            type="video/mp4">
          </video>
        </div>
        <div class="item item-video2">
          <video poster="" id="video2" autoplay controls muted loop height="100%">
            <source src="static/videos/carousel2.mp4"
            type="video/mp4">
          </video>
        </div>
        <div class="item item-video3">
          <video poster="" id="video3" autoplay controls muted loop height="100%">\
            <source src="static/videos/carousel3.mp4"
            type="video/mp4">
          </video>
        </div>
      </div>
    </div>
  </div>
</section>
-->
<!-- End video carousel -->


<!-- Paper poster -->
<!--
<section class="hero is-small is-light">
  <div class="hero-body">
    <div class="container">
      <h2 class="title">Poster</h2>

      <iframe  src="static/pdfs/sample.pdf" width="100%" height="550">
          </iframe>
        
      </div>
    </div>
  </section>
-->
<!--End paper poster -->


<!--BibTex citation -->
  <section class="section" id="BibTeX">
    <div class="container is-max-desktop content">
      <h2 class="title">BibTeX Citation</h2>
      <pre><code>
        @article{DiVia2024,
          author = {Di Via, R. and Odone, F. and Pastore, V. P.},
          title = {Self-supervised pre-training with diffusion model for few-shot landmark detection in x-ray images},
          year = {2024},
          journal = {arXiv},
          volume = {2407.18125},
          url = {https://arxiv.org/abs/2407.18125},
        }
      </code></pre>
      <h2 class="title">APA Citation</h2>
      <pre><code>
        Di Via, R., Odone, F., & Pastore, V. P. (2024). Self-supervised pre-training with diffusion model for few-shot landmark detection in x-ray images. ArXiv. https://arxiv.org/abs/2407.18125
      </code></pre>
    </div>
</section>
<!--End BibTex citation -->


  <footer class="footer">
  <div class="container">
    <div class="columns is-centered">
      <div class="column is-8">
        <div class="content">

          <p>
            This page was built using the <a href="https://github.com/eliahuhorwitz/Academic-project-page-template" target="_blank">Academic Project Page Template</a> which was adopted from the <a href="https://nerfies.github.io" target="_blank">Nerfies</a> project page.
            You are free to borrow the source code of this website, we just ask that you link back to this page in the footer. <br> This website is licensed under a <a rel="license"  href="http://creativecommons.org/licenses/by-sa/4.0/" target="_blank">Creative
            Commons Attribution-ShareAlike 4.0 International License</a>.
          </p>

        </div>
      </div>
    </div>
  </div>
</footer>

<!-- Statcounter tracking code -->
  
<!-- You can add a tracker to track page visits by creating an account at statcounter.com -->

    <!-- End of Statcounter Code -->

  </body>
  </html>