82 lines
1.9 KiB
C++
82 lines
1.9 KiB
C++
|
#include <algorithm>
|
||
|
#include <filesystem>
|
||
|
#include <iostream>
|
||
|
#include <map>
|
||
|
#include <sstream>
|
||
|
#include <string>
|
||
|
#include <string_view>
|
||
|
#include <vector>
|
||
|
#include <set>
|
||
|
|
||
|
#include "utils.h"
|
||
|
|
||
|
namespace fs = std::filesystem;
|
||
|
|
||
|
using WordVector = std::vector<std::string>;
|
||
|
using WordCountPair = std::pair<std::string, std::size_t>;
|
||
|
using CountedWordsMap = std::map<std::string, std::size_t>;
|
||
|
|
||
|
WordVector split_text(std::string_view text, const std::set<char> &dels);
|
||
|
CountedWordsMap count_words(const WordVector &words);
|
||
|
|
||
|
int main()
|
||
|
{
|
||
|
auto current_path = fs::current_path();
|
||
|
current_path /= "text.txt";
|
||
|
|
||
|
/* get text */
|
||
|
auto text = readFile(current_path.string());
|
||
|
std::cout << "---------- text\n";
|
||
|
std::cout << text;
|
||
|
|
||
|
/* make text lowercase */
|
||
|
std::transform(text.begin(), text.end(),
|
||
|
text.begin(), ::tolower);
|
||
|
|
||
|
/* split text */
|
||
|
std::cout << "\n---------- splitted text\n";
|
||
|
auto splitted_text = split_text(text, {' ', '\n', '.'});
|
||
|
print_vector(splitted_text);
|
||
|
|
||
|
/* count word occurences */
|
||
|
std::cout << "\n---------- word count\n";
|
||
|
auto counted_words = count_words(splitted_text);
|
||
|
print_map(counted_words);
|
||
|
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
CountedWordsMap count_words(const WordVector &words)
|
||
|
{
|
||
|
auto result = CountedWordsMap{};
|
||
|
for (auto word : words) {
|
||
|
++result[word];
|
||
|
}
|
||
|
return result;
|
||
|
}
|
||
|
|
||
|
|
||
|
#define AVG_W_LEN 4
|
||
|
WordVector split_text(std::string_view text, const std::set<char> &dels)
|
||
|
{
|
||
|
size_t start, count;
|
||
|
start = count = 0;
|
||
|
auto result = WordVector{};
|
||
|
/* pre allocate space */
|
||
|
result.reserve(text.length() / AVG_W_LEN);
|
||
|
|
||
|
for (auto c : text) {
|
||
|
if (dels.find(c) != dels.end()) {
|
||
|
if (count)
|
||
|
result.push_back(static_cast<std::string>(text.substr(start, count)));
|
||
|
start += count + 1;
|
||
|
count = 0;
|
||
|
} else
|
||
|
++count;
|
||
|
}
|
||
|
result.shrink_to_fit();
|
||
|
return result;
|
||
|
}
|
||
|
|
||
|
|