[LUA] Slug string for Vietnamese keyboards

Share your ideas & teach other
Post Reply
jesuisnk
Posts: 14
Joined: Mon May 06, 2024 11:06 pm

[LUA] Slug string for Vietnamese keyboards

Post by jesuisnk »

I found the url.slug() function method is really ineffective for strings entered from Vietnamese language keyboards. Here is my code to fix the above problem, you can refer to:

LUA:

Code: Select all

function slugVN(text)
    local diacriticsMap = {
        { base = 'a', regex = '[àáảãạăắằẵặẳâầấậẫẩ]' },
        { base = 'e', regex = '[èéẻẽẹêếềễệể]' },
        { base = 'i', regex = '[ìíỉĩị]' },
        { base = 'o', regex = '[òóỏõọôồốổỗộơờớởỡợ]' },
        { base = 'u', regex = '[ùúủũụưừứửữự]' },
        { base = 'y', regex = '[ỳýỷỹỵ]' },
        { base = 'd', regex = '[đ]' },
        { base = ' ', regex = '[%s]' }
    }

    local slug = text:lower()

    for _, diacritic in ipairs(diacriticsMap) do
        slug = slug:gsub(diacritic.regex, diacritic.base)
    end

    slug = slug:gsub('[%W_]+', '-') -- Remove any non-word characters
    slug = slug:gsub('[%s%-]+', '-') -- Replace whitespace and underscores with a single hyphen
    slug = slug:gsub('^%-+', '') -- Trim leading hyphens
    slug = slug:gsub('%-+$', '') -- Trim trailing hyphens

    return slug
end
local input_text = "Khoa Cơ Khí"
local slug_text = slugVN(input_text)
print(slug_text)
JAVASCRIPT:

Code: Select all

function slugVN(text) {
    const diacriticsMap = [
        { base: 'a', regex: /[àáảãạăắằẵặẳâầấậẫẩ]/g },
        { base: 'e', regex: /[èéẻẽẹêếềễệể]/g },
        { base: 'i', regex: /[ìíỉĩị]/g },
        { base: 'o', regex: /[òóỏõọôồốổỗộơờớởỡợ]/g },
        { base: 'u', regex: /[ùúủũụưừứửữự]/g },
        { base: 'y', regex: /[ỳýỷỹỵ]/g },
        { base: 'd', regex: /[đ]/g },
        { base: ' ', regex: /[\s]/g }
    ];

    let slug = text.toLowerCase();

    for (const diacritic of diacriticsMap) {
        slug = slug.replace(diacritic.regex, diacritic.base);
    }

    slug = slug.replace(/[^\w\s-]/g, ''); // Remove any non-word characters
    slug = slug.replace(/[\s_-]+/g, '-'); // Replace whitespace and underscores with a single hyphen
    slug = slug.replace(/^-+|-+$/g, ''); // Trim leading/trailing hyphens

    return slug;
}
Post Reply