# -*- coding: utf-8 -*-
# SPDX-FileCopyrightText: 2016-2025 PyThaiNLP Project
# SPDX-FileType: SOURCE
# SPDX-License-Identifier: Apache-2.0
[docs]
def longest_common_subsequence(str1: str, str2: str) -> str:
"""
Find the longest common subsequence between two strings.
:param str str1: The first string.
:param str str2: The second string.
:return: The longest common subsequence.
:rtype: str
:Example:
::
from pythainlp.util.lcs import longest_common_subsequence
print(longest_common_subsequence("ABCBDAB", "BDCAB"))
# output: "BDAB"
"""
m = len(str1)
n = len(str2)
# Create a 2D array to store lengths of longest common subsequence.
dp = [[0] * (n + 1) for _ in range(m + 1)]
# Build the dp array from bottom up.
for i in range(m + 1):
for j in range(n + 1):
if i == 0 or j == 0:
dp[i][j] = 0
elif str1[i - 1] == str2[j - 1]:
dp[i][j] = dp[i - 1][j - 1] + 1
else:
dp[i][j] = max(dp[i - 1][j], dp[i][j - 1])
# Following code is used to print LCS
index = dp[m][n]
# Create a character array to store the lcs string
lcs = [""] * (index + 1)
lcs[index] = ""
# Start from the right-most-bottom-most corner and
# one by one store characters in lcs[]
i = m
j = n
while i > 0 and j > 0:
# If current character in str1 and str2 are same, then
# current character is part of LCS
if str1[i - 1] == str2[j - 1]:
lcs[index - 1] = str1[i - 1]
i -= 1
j -= 1
index -= 1
# If not same, then find the larger of two and
# go in the direction of larger value
elif dp[i - 1][j] > dp[i][j - 1]:
i -= 1
else:
j -= 1
return "".join(lcs)