-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathprocess.py
More file actions
63 lines (49 loc) · 1.88 KB
/
process.py
File metadata and controls
63 lines (49 loc) · 1.88 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import json
words = open('words.txt').read().splitlines()
freqs = {}
for word in words:
for c in word:
if c not in freqs:
freqs[c] = 0
freqs[c] += 1
tuples = []
for c in freqs:
tuples.append((freqs[c], c))
tuples.sort(reverse=True)
primes = [
2,3,5,7,11,13,17,19,23,29,31,37,41,43,47,53,59,61,67,71,73,79,83,89,97,101,103,107,109,113,127,131,137,139,149,151,157,163,167,173,179,181,191,193,197,199,211,223,227,229,233,239,241,251,257,263,269,271,277,281,283,293,307,311,313,317,331,337,347,349,353,359,367,373,379,383,389,397,401,409,419,421,431,433,439,443,449,457,461,463,467,479,487,491,499,503,509,521,523,541,547,557,563,569,571,577,587,593,599,601,607,613,617,619,631,641,643,647,653,659,661,673,677,683,691,701,709,719,727,733,739,743,751,757,761,769,773,787,797,809,811,821,823,827,829,839,853,857,859,863,877,881,883,887,907,911,919,929,937,941,947,953,967,971,977,983,991,997,1009,1013,1019,1021,1031]
dups = [1, 1, 4800648691259650729, 93445708553524800143627380012419329729]
charDict = {}
i = 0
for freq, c in tuples:
charDict[c] = {
'yellow': primes[i],
'green': primes[i+1:i+6],
}
i += 6
print(tuples)
with open('charDict.json', 'w') as f:
json.dump(charDict, f)
wfreqs = {}
wf = open('en_wikt_words_1_5-5.txt', encoding="utf8").read().splitlines()
for fr in wf:
fr = fr.split(' ')
wfreqs[fr[0]] = int(fr[2])
wordList = []
for word in words:
num = 1
freq = {}
for i in range(len(word)):
if word[i] not in freq:
freq[word[i]] = 0
freq[word[i]] += 1
c = word[i]
num *= charDict[c]['green'][i]*charDict[c]['yellow']
for k in freq:
num *= dups[freq[k]]
wordList.append((num, wfreqs.get(word, 0), word))
wordList.sort()
f = open("wordList.txt", "w")
for num, wfreq, word in wordList:
f.write(str(num) + "," + str(wfreq) + "," + word + "\n")
f.close()