Source Code That Uses java.util.regex API to search for Regular Expressions in a Text

Source code in Java that takes a regular expression from the user and searches a text file for matching words. It outputs all of the matched words, the start and end index of each, and the frequency of matched words based on the re. This program uses the java.util.regex API. Check it out:

import java.io.BufferedReader; //used for opening text file
import java.io.FileReader; //used for reading text file
import java.io.IOException; //error handling
import java.util.regex.*; //regex api, used to return regular expression matches
import javax.swing.JOptionPane; //allows user to input the regular expression

public class Regex {

public static void main(String[] args) throws IOException{

System.out.println(“~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~”);
System.out.println(“~ Regex V1 by Mohammad Darwich ~”);
System.out.println(“~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~”);

/////////////////////////////////////////////////////////////////////////
String file = “C:\\corpus.txt”; //open corpus, assuming it is in the C: drive
String s2 = readFile(file); //read corpus text file
//System.out.println(s2);
String changeCorpusToLowercase = s2.toLowerCase(); //convert to lower case
//System.out.println(s2);

String regexInput = JOptionPane.showInputDialog(“Enter your Regular Expression” +
“\nExample: [A-Za-z0-9_]*[abc]+”);

regexChecker(regexInput,s2); //call regexChecker() method

} //end of main()

//this method has 2 parameters, the user defined RE, and the corpus
public static void regexChecker(String regexString, String corpus){
int patternCount = 0; //for counting the word frequency
Pattern checkRegex = Pattern.compile(regexString); //inputs the user defined regular expression
Matcher regexMatcher = checkRegex.matcher(corpus); //searches corpus for matches based on the user’s RE

while(regexMatcher.find()){ //while loop to keep searching through the entire text
if(regexMatcher.group().length() != 0){ //condition to check the EOF

System.out.println(“———————————————–”);

System.out.println(regexMatcher.group().trim());//returns matched word
System.out.println(“Start index: ” + regexMatcher.start()); //gives start index for matched word
System.out.println(“Endindex: ” + regexMatcher.end()); //gives end index for a matched word

patternCount +=1; //add 1 to the word frequency counter for a matched word

}

}
System.out.println(“********************************”);
System.out.println(“Word Frequecny –> ” + patternCount );  //displays the word frequency
System.out.println(“********************************”);
} //end of regexChecker()

public static String readFile( String file ) throws IOException { //method for opening and reading the corpus text file
BufferedReader reader = new BufferedReader( new FileReader (file)); //add file to buffer
String         line = null;
StringBuilder  stringBuilder = new StringBuilder(); //instanciate a new stringBuilder object
String         ls = System.getProperty(“line.separator”); //defines a line separator string

while( ( line = reader.readLine() ) != null ) {  //reads corpus line by line
stringBuilder.append( line ); //appneds null
stringBuilder.append( ls ); //appends ls
}

return stringBuilder.toString(); //saves corpus in temporary string to be returned to the main method
} //end of readFile()

} //end of class Regex