Too many times I want to use a Regular Expression to solve a simple string-problem and have to dive into regex’s again. And most of the times I basically want the same thing: find or replace a piece of text within a line of text. Because I don’t use regular expressions on a daily bases it always takes some time and Googling before I have my answer. So today I finally decided to write a little wrapper for it so I can reuse it whenever I want. This will save me a lot of time in the future, that’s for sure!
So here’s the code, perhaps you can use it and take your advantage. π Any feedback is welcome (because most likely it can be improved…).
The goal here is to get a wrapper method that can be used to find a particular string in a piece of text with the use of wildcard search. Since TDD it a good way to show what I want, I first give you 3 test-methods:
[TestMethod]
public void TestContainsPattern()
{
const string text = "Hello world [User002].";
bool patternFound1 = text.IsPatternMatch("[user*]");
Assert.IsTrue(patternFound1);
bool patternFound2 = text.IsPatternMatch("[user??2]");
Assert.IsTrue(patternFound2);
bool patternFound3 = text.IsPatternMatch("[User??2]", true); // Case sensitive
Assert.IsTrue(patternFound3);
bool patternNotFound1 = text.IsPatternMatch("(user*)");
Assert.IsFalse(patternNotFound1);
bool patternNotFound2 = text.IsPatternMatch("(user*)", true); // Case sensitive
Assert.IsFalse(patternNotFound2);
}
[TestMethod]
public void TestSearchPattern()
{
const string text = "Hello world [USER002].";
string msg1 = text.PatternSearch("[user*]");
Assert.AreEqual(msg1, "[USER002]");
string msg2 = text.PatternSearch("[user*]", false, true); // Exclude pattern so result is the wildcard replacement
Assert.AreEqual(msg2, "002");
string msg3 = text.PatternSearch("[user*]", true, false); // Case sensitive search so no result
Assert.AreEqual(msg3, "");
string msg4 = text.PatternSearch("[user*]", true, true); // Case sensitive so doesn't have impact on pattern showing
Assert.AreEqual(msg4, "");
string msg5 = text.PatternSearch("[user00?]", false, true); // Exclude pattern so result is the wildcard replacement
Assert.AreEqual(msg5, "2");
string msg6 = text.PatternSearch("[user???]", false, true); // Same here, result is the wildcard replacement
Assert.AreEqual(msg6, "002");
const string text2 = "Codes [USER002], [USER008]...";
string msg7 = text2.PatternSearch("[user*]");
Assert.AreEqual(msg7, "[USER002], [USER008]"); // Pity, I wanted the first match but apparently an 'outer' match yields
}
[TestMethod]
public void TestReplacePattern()
{
const string text = "Hello world [USER002].";
string msg1 = text.PatternReplace("[user*]", "and Marino");
Assert.AreEqual("Hello world and Marino.", msg1);
string msg2 = text.PatternReplace("[user00?]", "and Marino");
Assert.AreEqual("Hello world and Marino.", msg2);
string msg3 = text.PatternReplace("Daffy Duck", "XXX"); // Text to replace won't be found, so no change
Assert.AreEqual("Hello world [USER002].", msg3);
string msg4 = text.PatternReplace("[user00?]", "and Marino", true); // Case sensitive search without match, so no change
Assert.AreEqual("Hello world [USER002].", msg4);
}
Now you’ve seen what I mean, it’s time for the implementation.
/// <summary>
/// Simple search for a pattern in the given text. The pattern can contain '*' and '?' signs for wildcard searches.
/// A '*' can replace multiple characters while a '?' replaces one single character.
/// </summary>
/// <param name="text">Text string</param>
/// <param name="pattern">Pattern to search for</param>
/// <param name="searchCaseSensitive">True if the search should be Case Sensitive (false is the default value)</param>
/// <returns>True if the pattern was found in the text</returns>
public static bool IsPatternMatch(this string text, string pattern, bool searchCaseSensitive = false)
{
RegexOptions option = (searchCaseSensitive == false) ? RegexOptions.IgnoreCase : RegexOptions.None;
string regexPattern = ReplaceRegexChars(pattern);
Regex regex = new Regex(regexPattern, option);
return regex.IsMatch(text);
}
/// <summary>
/// Simple search for a pattern in the given text. The pattern can contain '*' and '?' signs for wildcard searches.
/// A '*' can replace multiple characters while a '?' replaces one single character.
/// <example>
/// PatternSearch("code [User002]", "[user???]") => "[User002]" - Pattern is recognized and the result is returned
/// PatternSearch("code [User002]", "[user???]", false) => "" - Pattern is not recognized because of case insensitive was set off for search
/// PatternSearch("code [User002]", "[user???]", true, true) => "002" - Pattern is recognized and only the wildcard result is returned
/// PatternSearch("code [User002]", "[User???]", false, true) => "002" - Same as above but now also the case sensitive search was succesful
/// PatternSearch("code [User002]", "[user*]") => "[User002]" - The * wildcard can be used as well
/// PatternSearch("codes [User002], [User008]", "[user*]") => "[User002], [User008]" - It's a SIMPLE search so (unfortunately) the first OUTER result is returned
/// </example>
/// </summary>
/// <param name="text">The given text to search in</param>
/// <param name="pattern">A text pattern to look for, possibly with wildcards ("*" or "?")</param>
/// <param name="searchCaseSensitive">True if the search should be Case Sensitive (false is the default value)</param>
/// <param name="excludePatternInResult">True is a wildcard search is made and only the wildcard result should be returned (false is the default value)</param>
/// <returns>The found result as a string</returns>
public static string PatternSearch(this string text, string pattern, bool searchCaseSensitive = false, bool excludePatternInResult = false)
{
string regexPattern = ReplaceRegexChars(pattern);
RegexOptions options = (searchCaseSensitive == false)
? RegexOptions.IgnoreCase | RegexOptions.CultureInvariant
: RegexOptions.CultureInvariant;
Regex regex = new Regex(regexPattern, options);
Match match = regex.Match(text);
if (match.Success && excludePatternInResult == false)
return match.ToString();
if (match.Success && match.Groups.Count > 1)
return match.Groups[1].Value;
return String.Empty;
}
/// <summary>
/// Simple replace for a pattern in the given text. The pattern can contain '*' and '?' signs for wildcard searches.
/// A '*' can replace multiple characters while a '?' replaces one single character.
/// <example>
/// PatternReplace("Hello world [User002]", "[user*]", "and Marino") => "Hello World and Marino." - Pattern is recognized and the result is returned
/// PatternReplace("Hello world [User002]", "[user00?]", "and Marino") => "Hello World and Marino." - Pattern is recognized and the result is returned
/// PatternReplace("Hello world [User002]", "Daffy Duck", "XXX") => "Hello World [USER002]." - Pattern is NOT recognized so no change
/// PatternReplace("Hello world [User002]", "[user00?]", "and Marino", true) => "Hello World [USER002]." - Case sensitive search so pattern is NOT recognized thus no change
/// </example>
/// </summary>
/// <param name="text">The given text to search in</param>
/// <param name="pattern">A text pattern to look for, possibly with wildcards ("*" or "?")</param>
/// <param name="replacement">A string used for replacing the pattern if a match is found</param>
/// <param name="searchCaseSensitive">True if the search should be Case Sensitive (false is the default value)</param>
/// <returns>The given text with a possible found pattern replaced by another text</returns>
public static string PatternReplace(this string text, string pattern, string replacement, bool searchCaseSensitive = false)
{
string regexPattern = ReplaceRegexChars(pattern);
RegexOptions options = RegexOptions.CultureInvariant;
if (searchCaseSensitive == false)
options |= RegexOptions.IgnoreCase;
string result = Regex.Replace(text, regexPattern, replacement, options);
return result;
}
/// <summary>
/// A rather dirty method to replace a user pattern with a Regex-pattern. For the 'normal' cases it will work,
/// but I can imagine a more exotic string pattern will cause unexpected results.
/// </summary>
/// <param name="text">A given pattern string</param>
/// <returns>The text rewritten as RegEx pattern</returns>
private static string ReplaceRegexChars(this IEnumerable<char> text)
{
const string regexChars = @"+.^$|\{}[]()-";
string retVal = String.Empty;
bool isInWildcardChar = false;
foreach (var c in text)
{
switch (c.ToString())
{
case "*":
retVal += isInWildcardChar ? ".+" : "(.+";
isInWildcardChar = true;
break;
case "?":
retVal += isInWildcardChar ? "." : "(.";
isInWildcardChar = true;
break;
case " ":
if (isInWildcardChar)
{
retVal += ")"; // Close the wildcard search
isInWildcardChar = false;
}
retVal += @"\s";
break;
default:
if (isInWildcardChar)
{
retVal += ")"; // Close the wildcard search
isInWildcardChar = false;
}
if (regexChars.Contains(c.ToString()))
retVal += @"\";
retVal += c;
break;
}
}
if (isInWildcardChar)
{
retVal += ")"; // Close the wildcard search
}
return retVal;
}
Btw, since a year or so I have my own reference-project in which I save all my hand-written, reusably code. I should have started this much earlier because it’s an enormous time-saver! The only overhead you have is copy/pasting reusably methods from your working solution to your reference solution. My previous thought of having complete customer’s solutions as a reference didn’t work out that well. In the time needed to find some implementation (if even found) I could have written it again, and even better π
Read Full Post »
You must be logged in to post a comment.