Java:如何在文件中查找相遇单词的数量?

3

这个程序需要在文件中查找某个单词并显示其在文件中出现的频率。但是它目前无法正确工作,匹配结果不准确。

import java.util.concurrent.*;
import java.util.*;
import java.io.*;

class FolderScan implements Runnable {

    private String path;
    private BlockingQueue<File> queue;
    private CountDownLatch latch;
    private File endOfWorkFile;

    FolderScan(String path, BlockingQueue<File> queue, CountDownLatch latch,
            File endOfWorkFile) {
        this.path = path;
        this.queue = queue;
        this.latch = latch;
        this.endOfWorkFile = endOfWorkFile;
    }

    public FolderScan() {
    }

    @Override
    public void run() {
        findFiles(path);
        queue.add(endOfWorkFile);
        latch.countDown();
    }

    private void findFiles(String path) {

        try {
            File root = new File(path);
            File[] list = root.listFiles();
            for (File currentFile : list) {
                if (currentFile.isDirectory()) {
                    findFiles(currentFile.getAbsolutePath());
                } else {
                    if (currentFile.getName().toLowerCase().endsWith((".txt"))) {
                        queue.put(currentFile);
                    }
                }
            }
        } catch (InterruptedException e) {
            e.printStackTrace();
        }

    }

}

public class FileScan implements Runnable {

    private String whatFind;
    private BlockingQueue<File> queue;
    private CountDownLatch latch;
    private File endOfWorkFile;

    public FileScan(String whatFind, BlockingQueue<File> queue,
            CountDownLatch latch, File endOfWorkFile) {
        this.whatFind = whatFind;
        this.queue = queue;
        this.latch = latch;
        this.endOfWorkFile = endOfWorkFile;
    }

    public FileScan() {
    }

    Set<String> words = new HashSet<String>();

    @Override
    public void run() {

        while (true) {
            try {
                File file;
                file = queue.take();

                if (file == endOfWorkFile) {
                    break;
                }

                scan(file);
            } catch (InterruptedException e) {
                e.printStackTrace();
            }
        }

        latch.countDown();
    }

    private void scan(File file) {
        Scanner scanner = null;
        int matches = 0;

        try {
            scanner = new Scanner(file);
        } catch (FileNotFoundException e) {
            System.out.println("File Not Found.");
            e.printStackTrace();
        }

        while (scanner.hasNext()) {
            String word = scanner.next();
            words.add(word);
        }

        if (words.contains(this.whatFind)) {
            matches++;
        }

        String myStr = String.format("File: %s - and the number of matches "
                + "is: %d", file.getAbsolutePath(), matches);
        System.out.println(myStr);
    }

    // ask user about input
    public void askUserPathAndWord() {

        BufferedReader bufferedReader = new BufferedReader(
                new InputStreamReader(System.in));
        String path;
        String whatFind;
        BlockingQueue<File> queue = new LinkedBlockingQueue<File>();

        try {
            System.out.println("Please, enter a Path and Word"
                    + "(which you want to find):");
            System.out.println("Please enter a Path:");
            path = bufferedReader.readLine();
            System.out.println("Please enter a Word:");
            whatFind = bufferedReader.readLine();

            if (path != null && whatFind != null) {

                File endOfWorkFile = new File("GameOver.tmp");
                CountDownLatch latch = new CountDownLatch(2);

                FolderScan folderScan = new FolderScan(path, queue, latch,
                        endOfWorkFile);
                FileScan fileScan = new FileScan(whatFind, queue, latch,
                        endOfWorkFile);

                Executor executor = Executors.newCachedThreadPool();
                executor.execute(folderScan);
                executor.execute(fileScan);

                latch.await();
                System.out.println("Thank you!");
            } else {
                System.out.println("You did not enter anything");
            }

        } catch (IOException | RuntimeException e) {
            System.out.println("Wrong input!");
            e.printStackTrace();
        } catch (InterruptedException e) {
            System.out.println("Interrupted.");
            e.printStackTrace();
        }
    }

    /**
     * @param args
     */

    public static void main(String[] args) {
        long startTime = System.currentTimeMillis();

        new FileScan().askUserPathAndWord();

        long stopTime = System.currentTimeMillis();
        long elapsedTime = stopTime - startTime;
        System.out.println("\nRuntime time " + elapsedTime + " milliseconds.");
    }
}

问题:

  • 如何解决这个问题并正确地组织这个文件中的查找结果?
  • 也许最好使用另一种逻辑?
3个回答

3

这似乎是一个问题:

while (scanner.hasNext()) {
    String word = scanner.next();
    words.add(word);
}

if (words.contains(this.whatFind)) {
    matches++;
}

这仅检查单词是否存在,而不是单词在文本中出现的次数。

改为:

while (scanner.hasNext()) {
    String word = scanner.next();
    if (word.equals(whatFind))
        matches++;
}

更简单地说:
while (scanner.hasNext())
    if (scanner.next().equals(whatFind))
        matches++;

我们如何检查这里的 if (currentFile.getName().toLowerCase().endsWith((".txt") queue.put(currentFile); - MIME 类型?因为我不知道该怎么做。 - user2101776
@NazarRoskolnikov endsWith(".mime")?不确定它是否仅限于该扩展名。如果存在非纯文本版本的此扩展名,则需要特定的解析器,这可能会相当复杂。 - Bernhard Barker
这个 endsWith(".mime") 不起作用。我使用了你的 simpler 变量 - 正确工作。我们如何只在文件中包含 whatFind 时打印? - user2101776
1
如果(匹配>0){String myStr = ...} @NazarRoskolnikov - Bernhard Barker

1
根据一些建议(非常感谢Dukeling!):
package task;

import java.util.concurrent.*;
import java.util.*;
import java.io.*;

class FolderScan implements Runnable {

    private String path;
    private BlockingQueue<File> queue;
    private CountDownLatch latch;
    private File endOfWorkFile;

    FolderScan(String path, BlockingQueue<File> queue, CountDownLatch latch,
            File endOfWorkFile) {
        this.path = path;
        this.queue = queue;
        this.latch = latch;
        this.endOfWorkFile = endOfWorkFile;
    }

    public FolderScan() {
    }

    @Override
    public void run() {
        findFiles(path);
        queue.add(endOfWorkFile);
        latch.countDown();
    }

    private void findFiles(String path) {

        try {
            File root = new File(path);
            File[] list = root.listFiles();
            for (File currentFile : list) {
                if (currentFile.isDirectory()) {
                    findFiles(currentFile.getAbsolutePath());
                } else {
                    if (currentFile.getName().toLowerCase().endsWith((".txt"))) {
                        queue.put(currentFile);
                    }
                }
            }
        } catch (InterruptedException e) {
            e.printStackTrace();
        }

    }

}

public class FileScan implements Runnable {

    private String whatFind;
    private BlockingQueue<File> queue;
    private CountDownLatch latch;
    private File endOfWorkFile;

    public FileScan(String whatFind, BlockingQueue<File> queue,
            CountDownLatch latch, File endOfWorkFile) {
        this.whatFind = whatFind;
        this.queue = queue;
        this.latch = latch;
        this.endOfWorkFile = endOfWorkFile;
    }

    public FileScan() {
    }

    @Override
    public void run() {

        while (true) {
            try {
                File file;
                file = queue.take();

                if (file == endOfWorkFile) {
                    break;
                }

                scan(file);
            } catch (InterruptedException e) {
                e.printStackTrace();
            }
        }

        latch.countDown();
    }

    private void scan(File file) {
        Scanner scanner = null;
        int matches = 0;

        try {
            scanner = new Scanner(file);
        } catch (FileNotFoundException e) {
            System.out.println("File Not Found.");
            e.printStackTrace();
        }

        while (scanner.hasNext())
            if (scanner.next().equals(whatFind)) {
                matches++;
            }

        if (matches > 0) {
            String myStr = String.format(
                    "File: %s - and the number of matches " + "is: %d",
                    file.getAbsolutePath(), matches);
            System.out.println(myStr);
        }
    }

    // ask user about input
    public void askUserPathAndWord() {

        BufferedReader bufferedReader = new BufferedReader(
                new InputStreamReader(System.in));
        String path;
        String whatFind;
        BlockingQueue<File> queue = new LinkedBlockingQueue<File>();

        try {
            System.out.println("Please, enter a Path and Word"
                    + "(which you want to find):");
            System.out.println("Please enter a Path:");
            path = bufferedReader.readLine();
            System.out.println("Please enter a Word:");
            whatFind = bufferedReader.readLine();

            if (path != null && whatFind != null) {

                File endOfWorkFile = new File("GameOver.tmp");
                CountDownLatch latch = new CountDownLatch(2);

                FolderScan folderScan = new FolderScan(path, queue, latch,
                        endOfWorkFile);
                FileScan fileScan = new FileScan(whatFind, queue, latch,
                        endOfWorkFile);

                Executor executor = Executors.newCachedThreadPool();
                executor.execute(folderScan);
                executor.execute(fileScan);

                latch.await();
                System.out.println("Thank you!");
            } else {
                System.out.println("You did not enter anything");
            }

        } catch (IOException | RuntimeException e) {
            System.out.println("Wrong input!");
            e.printStackTrace();
        } catch (InterruptedException e) {
            System.out.println("Interrupted.");
            e.printStackTrace();
        }
    }

    /**
     * @param args
     */

    public static void main(String[] args) {
        long startTime = System.currentTimeMillis();

        new FileScan().askUserPathAndWord();

        long stopTime = System.currentTimeMillis();
        long elapsedTime = stopTime - startTime;
        System.out.println("\nRuntime time " + elapsedTime + " milliseconds.");
    }
}

-1
 Set<String> words = new HashSet<String>();

选择一个列表实现的数据结构。
将以下if语句放置在while循环本身中。
 if (words.contains(this.whatFind)) {
        matches++;
    }

网页内容由stack overflow 提供, 点击上面的
可以查看英文原文,
原文链接