有人可以帮我编写代码吗?如何在文本文件中搜索任何单词并计算它们重复了多少次?
例如test.txt:
hi
hola
hey
hi
bye
hoola
hi
如果我想知道test.txt文本中单词"Hi"出现了多少次,程序应该显示"重复3次"
希望你明白我的意思,谢谢回答。
public int countWord(String word, File file) {
int count = 0;
Scanner scanner = new Scanner(file);
while (scanner.hasNextLine()) {
String nextToken = scanner.next();
if (nextToken.equalsIgnoreCase(word))
count++;
}
return count;
}
HashMap h=new HashMap();
FileInputStream fin=new FileInputStream("d:\\file.txt");
BufferedReader br=new BufferedReader(new InputStreamReader(fin));
String n;
while((n=br.readLine())!=null)
{
if(h.containsKey(n))
{
int i=(Integer)h.get(n);
h.put(n,(i+1));
}
else
h.put(n, 1);
}
Apache Commons - StringUtils.countMatches()
使用Google Guava库中的MultiSet
集合。
Multiset<String> wordsMultiset = HashMultiset.create();
Scanner scanner = new Scanner(fileName);
while (scanner.hasNextLine()) {
wordsMultiset.add(scanner.nextLine());
}
for(Multiset.Entry<String> entry : wordsMultiset ){
System.out.println("Word : "+entry.getElement()+" count -> "+entry.getCount());
}
public class Wordcount
{
public static void main(String[] args)
{
int count=0;
String str="hi this is is is line";
String []s1=str.split(" ");
for(int i=0;i<=s1.length-1;i++)
{
if(s1[i].equals("is"))
{
count++;
}
}
System.out.println(count);
}
}
package File1;
import java.io.BufferedReader;
import java.io.FileReader;
public class CountLineWordsDuplicateWords {
public static void main(String[] args) {
FileReader fr = null;
BufferedReader br =null;
String [] stringArray;
int counLine = 0;
int arrayLength ;
String s="";
String stringLine="";
try{
fr = new FileReader("F:/Line.txt");
br = new BufferedReader(fr);
while((s = br.readLine()) != null){
stringLine = stringLine + s;
stringLine = stringLine + " ";/*Add space*/
counLine ++;
}
System.out.println(stringLine);
stringArray = stringLine.split(" ");
arrayLength = stringArray.length;
System.out.println("The number of Words is "+arrayLength);
/*Duplicate String count code */
for (int i = 0; i < arrayLength; i++) {
int c = 1 ;
for (int j = i+1; j < arrayLength; j++) {
if(stringArray[i].equalsIgnoreCase(stringArray[j])){
c++;
for (int j2 = j; j2 < arrayLength; j2++) {
stringArray[j2] = stringArray[j2+1];
arrayLength = arrayLength - 1;
}
}//End of If block
}//End of Inner for block
System.out.println("The "+stringArray[i]+" present "+c+" times .");
}//End of Outer for block
System.out.println("The number of Line is "+counLine);
System.out.println();
fr.close();
br.close();
}catch (Exception e) {
e.printStackTrace();
}
}//End of main() method
}//End of class CountLineWordsDuplicateWords
package somePackage;
public static void main(String[] args) {
String path = ""; //ADD YOUR PATH HERE
String fileName = "test2.txt";
String testWord = "Macbeth"; //CHANGE THIS IF YOU WANT
int tLen = testWord.length();
int wordCntr = 0;
String file = path + fileName;
boolean check;
try{
FileInputStream fstream = new FileInputStream(file);
BufferedReader br = new BufferedReader(new InputStreamReader(fstream));
String strLine;
//Read File Line By Line
while((strLine = br.readLine()) != null){
//check to see whether testWord occurs at least once in the line of text
check = strLine.toLowerCase().contains(testWord.toLowerCase());
if(check){
//get the line, and parse its words into a String array
String[] lineWords = strLine.split("\\s+");
for(String w : lineWords){
//first see if the word is as least as long as the testWord
if(w.length() >= tLen){
/*
1) grab the specific word, minus whitespace
2) check to see whether the first part of it having same length
as testWord is equivalent to testWord, ignoring case
*/
String word = w.substring(0,tLen).trim();
if(word.equalsIgnoreCase(testWord)){
wordCntr++;
}
}
}
}
}
System.out.println("total is: " + wordCntr);
//Close the input stream
br.close();
} catch(Exception e){
e.printStackTrace();
}
}
您可以逐行读取文本文件。我假设每一行可能包含多个单词。对于每一行,您可以调用:
String[] words = line.split(" ");
for(int i=0; i<words.length; i++){
if(words[i].equalsIgnoreCase(searhedWord))
count++;
}
public int countWords(String w, String fileName) {
int count = 0;
Scanner scanner = new Scanner(inputFile);
scanner.useDelimiter("[^a-zA-Z]"); // non alphabets act as delimeters
String word = scanner.next();
if (word.equalsIgnoreCase(w))
count++;
return count;
}
尝试使用 Pattern
和 Matcher
这种方式。
import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Dem {
public static void main(String[] args){
try {
File f = new File("d://My.txt");
FileReader fr = new FileReader(f);
BufferedReader br = new BufferedReader(fr);
String s = new String();
while((s=br.readLine())!=null){
s = s + s;
}
int count = 0;
Pattern pat = Pattern.compile("it*");
Matcher mat = pat.matcher(s);
while(mat.find()){
if(mat.find()){
mat.start();
count++;
}
}
System.out.println(count);
} catch (Exception e) {
e.printStackTrace();
}
}
}