Spaces:

qubvel-hf
/

xfeat

Running

App Files Files Community

xfeat / app.py

qubvel-hf HF staff

Clean proj with LFS

9b7fcdb 4 months ago

raw

history blame contribute delete

No virus

3.89 kB

	import cv2
	import numpy as np
	import gradio as gr

	from modules.xfeat import XFeat
	from utils import visualize_matches


	HEADER = """
	<div align="center">
	<p>
	<span style="font-size: 30px; vertical-align: bottom;"> XFeat: Accelerated Features for Lightweight Image Matching</span>
	</p>
	<p style="margin-top: -15px;">
	<a href="https://arxiv.org/abs/2404.19174" target="_blank" style="color: grey;">ArXiv Paper</a>

	<a href="https://github.com/verlab/accelerated_features" target="_blank" style="color: grey;">GitHub Repository</a>
	</p>
	<p>
	Upload two images 🖼️ of the object and identify matches between them 🚀
	</p>
	</div>
	"""

	ABSTRACT = """
	We introduce a lightweight and accurate architecture for resource-efficient visual correspondence. Our method, dubbed XFeat (Accelerated Features), revisits fundamental design choices in convolutional neural networks for detecting, extracting, and matching local features. Our new model satisfies a critical need for fast and robust algorithms suitable to resource-limited devices. In particular, accurate image matching requires sufficiently large image resolutions -- for this reason, we keep the resolution as large as possible while limiting the number of channels in the network. Besides, our model is designed to offer the choice of matching at the sparse or semi-dense levels, each of which may be more suitable for different downstream applications, such as visual navigation and augmented reality. Our model is the first to offer semi-dense matching efficiently, leveraging a novel match refinement module that relies on coarse local descriptors. XFeat is versatile and hardware-independent, surpassing current deep learning-based local features in speed (up to 5x faster) with comparable or better accuracy, proven in pose estimation and visual localization. We showcase it running in real-time on an inexpensive laptop CPU without specialized hardware optimizations.
	"""

	def find_matches(image_0, image_1):

	image_0_bgr = cv2.cvtColor(image_0, cv2.COLOR_RGB2BGR)
	image_1_bgr = cv2.cvtColor(image_1, cv2.COLOR_RGB2BGR)

	xfeat = XFeat(weights="weights/xfeat.pt", top_k=4096)

	#Use out-of-the-box function for extraction + MNN matching
	match_kp0, match_kp1 = xfeat.match_xfeat(image_0_bgr, image_1_bgr, top_k = 4096)

	# canvas = warp_corners_and_draw_matches(mkpts_0, mkpts_1, image_0, image_1)

	_, mask = cv2.findHomography(match_kp0, match_kp1, cv2.USAC_MAGSAC, 3.5, maxIters=1_000, confidence=0.999)
	keep = mask.flatten().astype(bool)

	match_kp0 = match_kp0[keep]
	match_kp1 = match_kp1[keep]

	num_filtered_matches = len(match_kp0)

	viz = visualize_matches(
	image_0,
	image_1,
	match_kp0,
	match_kp1,
	np.eye(num_filtered_matches),
	show_keypoints=True,
	highlight_unmatched=True,
	title=f"{num_filtered_matches} matches",
	line_width=2,
	)

	return viz


	with gr.Blocks() as demo:

	gr.Markdown(HEADER)
	with gr.Accordion("Abstract (click to open)", open=False):
	gr.Image("assets/xfeat_arq.png")
	gr.Markdown(ABSTRACT)

	with gr.Row():
	image_1 = gr.Image()
	image_2 = gr.Image()
	with gr.Row():
	button = gr.Button(value="Find Matches")
	clear = gr.ClearButton(value="Clear")
	output = gr.Image()
	button.click(find_matches, [image_1, image_2], output)
	clear.add([image_1, image_2, output])

	gr.Examples(
	examples=[
	["assets/ref.png", "assets/tgt.png"],
	["assets/demo1.jpg", "assets/demo2.jpg"],
	["assets/tower-1.webp", "assets/tower-2.jpeg"],
	],
	inputs=[image_1, image_2],
	outputs=[output],
	fn=find_matches,
	cache_examples=None,
	)

	if __name__ == "__main__":
	demo.launch()