From 3145a10b340ead99e433022c6033aa4e4e427060 Mon Sep 17 00:00:00 2001 From: Maggan Date: Mon, 18 May 2020 17:19:36 +0300 Subject: [PATCH 1/4] Create freq03.py --- src/freq03.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 src/freq03.py diff --git a/src/freq03.py b/src/freq03.py new file mode 100644 index 0000000..555adde --- /dev/null +++ b/src/freq03.py @@ -0,0 +1,20 @@ +import re +import time +from collections import defaultdict + +start_time = time.time() + +words = defaultdict(int) +pattern = re.compile(r'[a-zA-Z]+') + +result = open('result.txt', 'w', encoding='utf-8', errors='ignore') +with open('pg.txt', 'r', encoding='utf-8', errors='ignore') as f: + for line in f: + str = pattern.findall(line) + for w in str: + words[w.lower()] += 1 + +for w in sorted(words, key=words.get, reverse=True): + print(w, words[w], file=result) + +print("--- %s seconds ---" % (time.time() - start_time)) From bc3fe3d4a769ec7d292396efc3f2db7d4c4c1d83 Mon Sep 17 00:00:00 2001 From: Maggan Date: Wed, 20 May 2020 23:21:23 +0300 Subject: [PATCH 2/4] Create antifreq.cpp --- src/antifreq.cpp | 83 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 83 insertions(+) create mode 100644 src/antifreq.cpp diff --git a/src/antifreq.cpp b/src/antifreq.cpp new file mode 100644 index 0000000..423bdf5 --- /dev/null +++ b/src/antifreq.cpp @@ -0,0 +1,83 @@ +#include +#include +#include +#include +#include + +using namespace std; + +#pragma warning (disable : 4996) + +bool SortByVal(const pair &a, const pair &b) +{ + if (a.second == b.second) + { + return a.first < b.first; + } + return a.second > b.second; +} + +int main(int argc, char *argv[]) +{ + if (argc != 3) + { + cout << "Example: " << argv[0] << " in.txt out.txt" << endl; + return 1; + } + + unordered_map dict; + vector> sortedList; + + ifstream fIn(argv[1]); + + const std::regex re("[a-z]+", std::regex_constants::icase); + std::regex::optimize; + std::locale loc; + std::smatch match; + + string line, word; + + while (std::getline(fIn, line)) + { + while (std::regex_search(line, match, re)) + { + word = match[0].str(); + + for (auto &chr : word) + { + chr = std::tolower(chr, loc); + } + + if (dict[word]) + { + dict[word]++; + } + else + { + dict[word] = 1; + } + + line = match.suffix().str(); + } + } + + for (unordered_map ::iterator it = dict.begin(); it != dict.end(); it++) + { + sortedList.push_back(make_pair(it->first, it->second)); + } + + sort(sortedList.begin(), sortedList.end(), SortByVal); + + ofstream fOut; + fOut.open(argv[2]); + + for (auto &elem : sortedList) + { + fOut << elem.first << " " << elem.second << endl; + } + + fIn.close(); + fOut.close(); + + return 0; +} \ No newline at end of file From e70bd529d823501281a95f7911d4048e4f8d53d3 Mon Sep 17 00:00:00 2001 From: Maggan Date: Thu, 21 May 2020 01:06:01 +0300 Subject: [PATCH 3/4] arguments added --- src/antifreq.cpp | 4 ++-- src/freq03.py | 17 ++++++++++------- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/src/antifreq.cpp b/src/antifreq.cpp index 423bdf5..eb47f80 100644 --- a/src/antifreq.cpp +++ b/src/antifreq.cpp @@ -50,7 +50,7 @@ int main(int argc, char *argv[]) if (dict[word]) { - dict[word]++; + ++dict[word]; } else { @@ -61,7 +61,7 @@ int main(int argc, char *argv[]) } } - for (unordered_map ::iterator it = dict.begin(); it != dict.end(); it++) + for (unordered_map ::iterator it = dict.begin(); it != dict.end(); ++it) { sortedList.push_back(make_pair(it->first, it->second)); } diff --git a/src/freq03.py b/src/freq03.py index 555adde..3ff5aa1 100644 --- a/src/freq03.py +++ b/src/freq03.py @@ -1,20 +1,23 @@ import re -import time +import sys from collections import defaultdict -start_time = time.time() + +if len(sys.argv) != 3: + print("Example: " + sys.argv[0] + " in.txt out.txt"); + sys.exit(1) words = defaultdict(int) pattern = re.compile(r'[a-zA-Z]+') -result = open('result.txt', 'w', encoding='utf-8', errors='ignore') -with open('pg.txt', 'r', encoding='utf-8', errors='ignore') as f: +with open(sys.argv[1], 'r', encoding='utf-8', errors='ignore') as f: for line in f: str = pattern.findall(line) for w in str: words[w.lower()] += 1 -for w in sorted(words, key=words.get, reverse=True): - print(w, words[w], file=result) +with open(sys.argv[2], 'w', encoding='utf-8', errors='ignore') as result: + for w, c in sorted(words.items(), key=lambda item: (-item[1], item[0])): + result.write('%s %d\n' % (w, c)) + -print("--- %s seconds ---" % (time.time() - start_time)) From f577a399dcab6e9e7031566764ffb0edfe415855 Mon Sep 17 00:00:00 2001 From: Maggan Date: Thu, 21 May 2020 01:17:55 +0300 Subject: [PATCH 4/4] pragma removed --- src/antifreq.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/antifreq.cpp b/src/antifreq.cpp index eb47f80..3d3384e 100644 --- a/src/antifreq.cpp +++ b/src/antifreq.cpp @@ -6,8 +6,6 @@ using namespace std; -#pragma warning (disable : 4996) - bool SortByVal(const pair &a, const pair &b) { if (a.second == b.second)