Source code for pythainlp.util.lcs

# -*- coding: utf-8 -*-
# SPDX-FileCopyrightText: 2016-2025 PyThaiNLP Project
# SPDX-FileType: SOURCE
# SPDX-License-Identifier: Apache-2.0

[docs] def longest_common_subsequence(str1: str, str2: str) -> str: """ Find the longest common subsequence between two strings. :param str str1: The first string. :param str str2: The second string. :return: The longest common subsequence. :rtype: str :Example: :: from pythainlp.util.lcs import longest_common_subsequence print(longest_common_subsequence("ABCBDAB", "BDCAB")) # output: "BDAB" """ m = len(str1) n = len(str2) # Create a 2D array to store lengths of longest common subsequence. dp = [[0] * (n + 1) for _ in range(m + 1)] # Build the dp array from bottom up. for i in range(m + 1): for j in range(n + 1): if i == 0 or j == 0: dp[i][j] = 0 elif str1[i - 1] == str2[j - 1]: dp[i][j] = dp[i - 1][j - 1] + 1 else: dp[i][j] = max(dp[i - 1][j], dp[i][j - 1]) # Following code is used to print LCS index = dp[m][n] # Create a character array to store the lcs string lcs = [""] * (index + 1) lcs[index] = "" # Start from the right-most-bottom-most corner and # one by one store characters in lcs[] i = m j = n while i > 0 and j > 0: # If current character in str1 and str2 are same, then # current character is part of LCS if str1[i - 1] == str2[j - 1]: lcs[index - 1] = str1[i - 1] i -= 1 j -= 1 index -= 1 # If not same, then find the larger of two and # go in the direction of larger value elif dp[i - 1][j] > dp[i][j - 1]: i -= 1 else: j -= 1 return "".join(lcs)