我发现这个想法很有趣,所以我做了一个二分搜索的概念验证。
这个程序测试不够充分,可能会有一些错误,但目前看起来还是能够工作的,并且演示了分治的思想。你在文件的中间检查,根据是否太高或太低,将数据分成两部分并搜索相关的一半。你递归地重复执行这个过程,直到你接近目标。
#include <ctime>
#include <cmath>
#include <cstdlib>
#include <string>
#include <fstream>
#include <iostream>
int global_counter;
std::streampos find_stamp(std::istream& is, long stamp, std::streampos pos, std::streampos end)
{
++global_counter;
if(pos == 0)
return 0;
std::string s;
long found_stamp;
is.seekg(pos);
if(!(std::getline(std::getline(is, s, ','), s, '"') >> found_stamp))
return end;
if(found_stamp > stamp)
return find_stamp(is, stamp, pos / 2, pos);
if(stamp - found_stamp > 10)
return find_stamp(is, stamp, (pos + end) / 2, end);
pos = is.tellg();
while(std::getline(std::getline(is, s, ','), s, '"') >> found_stamp)
{
if(found_stamp > stamp)
return pos;
pos = is.tellg();
}
return end;
}
void print_after(const std::string& filename, long stamp)
{
std::ifstream ifs(filename, std::ios::ate);
std::streampos end = ifs.tellg();
auto pos = end / 2;
if((pos = find_stamp(ifs, stamp, pos, end)) != end)
{
ifs.seekg(pos);
std::string line;
std::getline(ifs, line, ',');
while(std::getline(ifs, line, ','))
std::cout << line;
}
}
inline
std::string leading_zeros(int n, int zeros = 2)
{
std::string s;
for(int z = std::pow(10, zeros - 1); z; z /= 10)
s += (n < z ? "0":"");
return s + std::to_string(n);
}
int main()
{
std::srand(std::time(0));
std::ofstream ofs("test.txt");
for(int i = 0; i < 1000; ++i)
{
ofs << '"' << leading_zeros(i, 10) << '"';
ofs << ":{\"AA\":" << (std::rand() % 100);
ofs << '.' << (std::rand() % 100) << "},\n";
}
ofs.close();
global_counter = 0;
print_after("test.txt", 993);
std::cout << "find checked " << global_counter << " places in the file\n";
}
输出:
"0000000994":{"AA":80.6}
"0000000995":{"AA":11.90}
"0000000996":{"AA":16.43}
"0000000997":{"AA":53.11}
"0000000998":{"AA":68.43}
"0000000999":{"AA":79.77}
find checked 6 places in the file