QEScripts/ViewCalc.py at master · ZGainsforth/QEScripts · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
import os
import numpy as np
import matplotlib.pyplot as plt
import re
from io import StringIO

def GetChunkFromTextFile(FileName, StartStr, StopStr, skip_header=0, skip_footer=0, LastHit=True, DataType='array'):
    # DataType means we can extract the chunk and then turn it into:
    # 1) Numpy table 'numpy'
    # 2) return the raw text 'raw'
    DataType = DataType.lower()

    # Read the file.
    try:
        with open(FileName, 'r') as myfile:
            data = myfile.read()
    except:
        print('Failed to open ' + FileName + '.  Skipping.')
        return

    # This regex looks for the data between the start and top strings.
    reout = re.compile('%s(.*?)%s' % (StartStr, StopStr), re.S)
    try:
        # Extract just the data we want.
        if LastHit == False:
            SectionStr = reout.search(data).group(1)
        else:
            SectionStr = reout.findall(data)[-1]
    except:
        # It is possible that the user asked for something that isn't in the file.  If so, just bail.
        return None

    if DataType == 'raw':
        # Now apply skip_header and skip_footer
        SectionData = SectionStr
        SectionData = ''.join(SectionData.splitlines(True)[skip_header:])
        if skip_footer > 0:
            SectionData = ''.join(SectionData.splitlines(True)[:-skip_footer])

    if DataType == 'float':
        SectionData = np.float(SectionStr)

    if DataType == 'array':
        # Convert it into a numpy array.
        SectionData = np.genfromtxt(StringIO(SectionStr), skip_header=skip_header, skip_footer=skip_footer, dtype=None)

    return SectionData

def floatme(v):
    v = v.strip()
    if len(v) == 0:
        return 0.0
    else:
        return float(v)

def ConvertEspressoTimeToSec(RunTime):
    import re

    try:
        RunSeconds = re.findall('m([.0-9]*)', RunTime)[0]
    except:
        RunSeconds = ''
    try:
        RunMinutes = re.findall('([.0-9]*)m', RunTime)[0]
    except:
        RunMinutes = ''
    try:
        RunHours = re.findall('([.0-9]*)h', RunTime)[0]
    except:
        RunHours = ''
    try:
        RunDays = re.findall('([.0-9]*)d', RunTime)[0]
    except:
        RunDays = ''

    RunSeconds = floatme(RunSeconds)
    RunSeconds += floatme(RunMinutes)*60
    RunSeconds += floatme(RunHours)*3600
    RunSeconds += floatme(RunDays)*3600*24

    return RunSeconds

def GenerateSingleSummary(InFilebase):
    # Get the base file names.
    with open(os.path.join('CalcSummaries', InFilebase + '-BaseNames.txt'), 'r') as f:
        BaseNames = [b.strip() for b in f.readlines()]

    # Get the X-axis labels.
    with open(os.path.join('CalcSummaries', InFilebase + '-XVary.txt'), 'r') as f:
        # The first line is the header.
        XLabelType = f.readline().strip()
        XLabels = [b.strip() for b in f.readlines()]

    # Let's get the energy, forces and runtimes out of each of the files.
    Energies = np.zeros(len(BaseNames))
    Forces = np.copy(Energies)
    RunTimes = np.copy(Energies)
    for n, FileName in enumerate(BaseNames):
        try:
            Energies[n] = GetChunkFromTextFile(FileName=FileName + '.out', StartStr='!\s*total energy\s*=\s*', StopStr=' Ry', DataType='float')
            Forces[n] = GetChunkFromTextFile(FileName=FileName + '.out', StartStr='Total force =\s*', StopStr='\s*Total SCF correction', DataType='float')
        except:
            # Sometimes there will be a failed computation.  If so, we leave it out.
            Energies[n] = np.nan
            Forces[n] = np.nan

        try:
            RunTime = GetChunkFromTextFile(FileName=FileName + '.out', StartStr='PWSCF\s*:\s*', StopStr='CPU', DataType='raw')
            RunTimes[n] = ConvertEspressoTimeToSec(RunTime)
        except:
            RunTimes[n] = np.nan

    # Save two column files for each.
    np.savetxt(os.path.join('CalcSummaries', InFilebase + '-Energies.txt'), np.vstack((XLabels, Energies.astype('|S32'))).T, header=XLabelType + ' Rydbergs', fmt='%s %s')
    np.savetxt(os.path.join('CalcSummaries', InFilebase + '-Forces.txt'), np.vstack((XLabels, Forces.astype('|S32'))).T, header=XLabelType + ' Rybergs/Bohr', fmt='%s %s')
    np.savetxt(os.path.join('CalcSummaries', InFilebase + '-RunTimes.txt'), np.vstack((XLabels, RunTimes.astype('|S32'))).T, header=XLabelType + ' Seconds', fmt='%s %s')

    # Return all the obtained data.
    SingleSummary = dict()
    SingleSummary['InFile'] = InFilebase
    SingleSummary['BaseNames'] = BaseNames
    SingleSummary['XLabelType'] = XLabelType
    SingleSummary['XLabels'] = XLabels
    SingleSummary['Energies'] = Energies
    SingleSummary['Forces'] = Forces
    SingleSummary['RunTimes'] = RunTimes

    return SingleSummary


def PlotSummary(Summary, prefix=''):
    XTicks = Summary['XLabels']
    X = range(len(Summary['Energies']))

    # Plot the energy convergence.
    plt.figure()
    E = Summary['Energies']
    plt.plot(X, E)
    plt.xlabel(Summary['XLabelType'])
    plt.ylabel('Rydbergs')
    plt.xticks(X, XTicks)
    plt.title(prefix + 'Energies')

    # Plot force
    plt.figure()
    F = Summary['Forces']
    plt.plot(X, F)
    plt.xlabel(Summary['XLabelType'])
    plt.ylabel('Rydbergs per Bohr')
    plt.xticks(X, XTicks)
    plt.title(prefix + 'Forces')

    # Plot Time
    plt.figure()
    T = Summary['RunTimes']
    plt.plot(X, T)
    plt.xlabel(Summary['XLabelType'])
    plt.ylabel('seconds')
    plt.xticks(X, XTicks)
    plt.title(prefix + 'Run Time')


def CalcSummary():
    # Get the base file names.  If there is only one, then we will plot it.  If there are two, then we are doing differential convergence.
    with open(os.path.join('CalcSummaries', 'BaseNames.txt'), 'r') as f:
        InFile1 = f.readline().strip()
        try:
            InFile2 = f.readline().strip()
        except:
            InFile2 = None
        if InFile2 == '':
            # For some reason, even though the file has only one line, it returns a second blank line...
            InFile2 = None

    # Now Generate summaries from each
    Summary1 = GenerateSingleSummary(InFile1)
    if InFile2 is None:
        # If there is only one summary, then we plot it.
        PlotSummary(Summary1, prefix=InFile1 + ' ')
    else:
        # Otherwise, with two summaries, we have to plot the differential.
        Summary2 = GenerateSingleSummary(InFile2)

        # If there is a second summary, then this is a differential comparison.  Produce plots of the first minus the second.
        Energies = Summary1['Energies'] - Summary2['Energies']
        Forces = Summary1['Forces'] - Summary2['Forces']
        RunTimes = Summary1['RunTimes'] - Summary2['RunTimes']
        XLabels = [x1 + '-' + x2 for x1, x2 in zip(Summary1['XLabels'], Summary2['XLabels'])]
        BaseName = Summary1['InFile'] + '-minus-' + Summary2['InFile']
        XLabelType = Summary1['XLabelType']

        # Save two column files for each.
        np.savetxt(os.path.join('CalcSummaries', BaseName + '-Energies.txt'), np.vstack((XLabels, Energies.astype('|S32'))).T, header=XLabelType + ' Rydbergs', fmt='%s %s')
        np.savetxt(os.path.join('CalcSummaries', BaseName + '-Forces.txt'), np.vstack((XLabels, Forces.astype('|S32'))).T, header=XLabelType + ' Rybergs/Bohr', fmt='%s %s')
        np.savetxt(os.path.join('CalcSummaries', BaseName + '-RunTimes.txt'), np.vstack((XLabels, RunTimes.astype('|S32'))).T, header=XLabelType + ' Seconds', fmt='%s %s')

        DiffSummary = dict()
        DiffSummary['InFile'] = BaseName
        DiffSummary['XLabelType'] = XLabelType
        DiffSummary['XLabels'] = XLabels
        DiffSummary['Energies'] = Energies
        DiffSummary['Forces'] = Forces
        DiffSummary['RunTimes'] = RunTimes

        PlotSummary(Summary1, prefix=InFile1 + ' ')
        PlotSummary(Summary2, prefix=InFile2 + ' ')
        PlotSummary(DiffSummary, prefix='Differential ')

    plt.show()

    return


if __name__ == '__main__':
    CalcSummary()