protpore/Fast5Types.py at main · mitenjain/protpore · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
#!/usr/bin/env python
# Bryan Thornlow
# Fast5Types.py
# Uses Adam Novak's yahmm.py and Jacob Schreiber's PyPore
# Adapted from Jacob Schreiber's DataTypes.py

'''
NEW TO THIS MODULE:

Fast5FileSet: A container for .fast5 files that have the same run_id. Because each .fast5
file is analogous to a File.Event object, a class is needed that is analogous to an .abf file.
This class contains a set of .fast5 files and therefore a set of Events. It acts in the same
way within the pipeline as the File object does for .abf files, which often contain multiple
Events.

=========================

COPIED FROM DATATYPES.PY:

everything else
'''

import numpy as np
from PyPore.read_abf import *
from matplotlib import pyplot as plt
from PyPore.hmm import *
from PyPore.core import *
from PyPore.database import *
from PyPore.parsers import *
from PyPore.alignment import *

import json
import time
from itertools import chain, izip, tee, combinations
import itertools as it
import re

class Fast5FileSet(Segment):
    """Contains raw ionic current extracted from .fast5 files"""

    def __init__(self, filename, timestep, current):
        self.filenames = [filename]
        self.timesteps = [timestep]
        self.currents = [current]
        self.seconds = [1000/timestep]
        self.events = []

    def parse(self, seg):
        self.events.append(Event(second=seg.second, current=seg.current,
            start=seg.start/seg.second,
            end=(seg.start+seg.duration)/seg.second,
            duration=seg.duration/seg.second, file=self))


class Event(Segment):
    '''
    A container for the ionic current corresponding to an 'event', which means a portion of the
    file containing useful data.
    '''

    def __init__(self, current, segments=[], **kwargs):
        # If segments are provided, set an appropriate duration
        if len(segments) > 0:
            try:
                current = np.concatenate( ( seg.current for seg in segments ) )
            except:
                current = []

        Segment.__init__( self, current, filtered=False, segments=segments, **kwargs )


    def filter(self, order=1, cutoff=2000.):
        '''
        Performs a bessel filter on the selected data, normalizing the cutoff frequency by the
        nyquist limit based on the sampling rate.
        '''

        if type(self) != Event:
            raise TypeError( "Cannot filter a metaevent. Must have the current." )
        from scipy import signal

        nyquist = self.second / 2.

        (b, a) = signal.bessel( order, cutoff / nyquist, btype='low', analog=0, output = 'ba' )
        self.current = signal.filtfilt( b, a, self.current )
        self.filtered = True
        self.filter_order = order
        self.filter_cutoff = cutoff

    def parse(self, parser=SpeedyStatSplit( prior_segments_per_second=10 ), hmm=None):
        '''
        Ensure that the data is filtered according to a bessel filter, and then applies a
        plug-n-play state parser which must contain a .parse method. If a hmm is given, it will
        use a hmm to assist in the parsing. This occurs by segmenting the event using the parser,
        and then running the segments through the hmm, stringing together consecutive segments
        which yield the same state in the hmm. If no hmm is given, returns the raw parser
        segmentation.
        '''

        self.segments = parser.parse( self.current )
        for segment in self.segments:
            segment.event = self
            segment.scale(float(self.second))

        # If using HMM-Guided Segmentation, run the segments through the HMM
        if hmm:

            # Currently only supports HMMs generated from yahmm
            assert type( hmm ) is Model, "TypeError: hmm must be generated from yahmm package."

            # Find the viterbi path through the events
            logp, states = self.apply_hmm( hmm )

            second = self.second
            i, j, n, segments = 0, 0, len(self.segments), []

            while i < n-1:
                if states[i][1].name != states[i+1][1].name or i == n-2:
                    ledge = self.segments[j]
                    redge = ( self.segments[i] if i < n-2 else self.segments[-1] )
                    segs = self.segments[j:i+1]

                    if self.__class__.__name__ == "MetaEvent":
                        duration = sum( seg.duration for seg in segs )
                        mean = sum( seg.mean*seg.duration for seg in segs )/duration
                        std = math.sqrt( sum( seg.std**2*seg.duration for seg in segs )/duration )

                        segments.append( MetaSegment( start=ledge.start*second,
                                                      duration=duration,
                                                      mean=mean,
                                                      std=std,
                                                      event=self,
                                                      second=self.second,
                                                      hidden_state=states[j+1].name ) )

                    else:
                        s, e = int(ledge.start*second), int(redge.start*second+redge.n)
                        current = self.current[ s : e ]
                        segments.append( Segment( start=s,
                                                  current=current,
                                                  event=self,
                                                  second=self.second,
                                                  hidden_state=states[j+1][1].name ) )
                    j = i
                i += 1
            self.segments = segments
        self.state_parser = parser

    def delete( self ):
        '''
        Delete all data associated with itself, including making the call on all segments if they
        exist, ensuring that all references get removed immediately.
        '''

        with ignored( AttributeError ):
            del self.current
        with ignored( AttributeError ):
            del self.state_parser
        for segment in self.segments:
            segment.delete()
        del self

    def apply_hmm( self, hmm, algorithm='viterbi' ):
        '''
        Apply a hmm to the segments, returning the log probability and the state
        sequence. Only uses the means of the segments currently.
        '''

        return getattr( hmm, algorithm )( np.array([ seg.mean for seg in self.segments ]) )

    def plot( self, hmm=None, cmap="Set1", algorithm='viterbi', color_cycle=['r', 'b', '#FF6600', 'g'],
        hidden_states=None, lines=False, line_kwargs={ 'c': 'k' }, **kwargs ):
        '''
        Plot the segments, colored either according to a color cycle, or according to the colors
        associated with the hidden states of a specific hmm passed in. Accepts all arguments that
        pyplot.plot accepts, and passes them along.
        '''

        if hmm:
            if not hidden_states:
                _, hidden_states = self.apply_hmm( hmm, algorithm )
            hidden_states = filter( lambda state: not state[1].is_silent(), hidden_states )

            if isinstance( cmap, dict ):
                # If you pass in a custom coloring scheme, use that.
                hmm_color_cycle = []
                for _, state in hidden_states:
                    if state.name in cmap.keys():
                        hmm_color_cycle.append( cmap[state.name] )
                    elif 'else' in cmap.keys():
                        hmm_color_cycle.append( cmap['else'] )
                    else:
                        hmm_color_cycle.append( 'k' )
            else:
                cm = plt.get_cmap( cmap )

                try:
                    # If using the naming scheme of "X..." meaning a single character
                    # to indicate state type, then an integer, then parse using that.
                    # Ex: U1, U15, I17, M201, M2...
                    n = float( hmm.name.split('-')[1] )
                    hmm_color_cycle = []

                    for i, state in hidden_states:
                        if state.name[0] == 'U':
                            hmm_color_cycle.append( 'r' )
                        elif state.name[0] == 'I':
                            hmm_color_cycle.append( 'k' )
                        else:
                            idx = float( re.sub( "[^0-9]", "", state.name ) ) / n
                            hmm_color_cycle.append( cm( idx ) )

                except:
                    # If using any other naming scheme, assign a color from the colormap
                    # to each state without any ordering, since none was specified.
                    states = { hmm.states[i]: i for i in xrange( len(hmm.states) ) }
                    hmm_color_cycle = [ cm( states[state] ) for i, state in hidden_states ]

        if 'color' in kwargs.keys(): # If the user has specified a scheme..
            color_arg = kwargs['color'] # Pull out the coloring scheme..

            if color_arg == 'cycle': # Use a 4-color rotating cycle
                color = [ color_cycle[i%4] for i in xrange(self.n) ]

            elif color_arg == 'hmm': # coloring by HMM hidden state
                color = hmm_color_cycle

            elif color_arg == 'model': # Color by the models in the HMM
                color, labels, i, new_model = [], [], 0, False
                cycle = [ 'b', 'r', 'c', 'k', 'y', 'm', '0.25', 'g', '0.75' ]
                for index, state in hidden_states:
                    if not state.is_silent():
                        color.append( cycle[i%9] )
                        if not new_model:
                            labels.append( None )
                        new_model = False
                    elif state.name.endswith( "-start" ):
                        labels.append( state.name[:-6] )
                        new_model = True
                        i += 1
            else:
                color = kwargs['color']

            del kwargs['color']
        else:
            color, color_arg = 'k', 'k'

        # Set appropriate labels
        if 'label' in kwargs.keys():
            if isinstance( label, str ):
                labels = [ kwargs['label'] ]
            else:
                labels = kwargs['label']
        elif color_arg != 'model':
            labels = []

        # Actually do the plotting here
        # If no segments, plot the entire event.
        if isinstance( color, str ):
            plt.plot( np.arange(0, len( self.current ) )/self.second,
                self.current, color=color, **kwargs )

        # Otherwise plot them one segment at a time, colored appropriately.
        else:
            for c, segment, l in it.izip_longest( color, self.segments, labels ):
                plt.plot( np.arange(0, len( segment.current ) )/self.second + segment.start,
                    segment.current, color=c, label=l, **kwargs )

                # If plotting the lines, plot the line through the means
                if lines:
                    plt.plot( [segment.start, segment.end], [segment.mean, segment.mean], **line_kwargs )

            # If plotting the lines, plot the transitions from one segment to another
            if lines:
                for seg, next_seg in it.izip( self.segments[:-1], self.segments[1:] ):
                    plt.plot( [seg.end, seg.end], [ seg.mean, next_seg.mean ], **line_kwargs )

        # If labels have been passed in, then add the legend.
        if len(labels) > 0:
            plt.legend()

        # Set the title to include filename and time, or just time.
        try:
            plt.title( "Event at {} at {}s".format( self.file.filename, self.start ) )
        except:
            plt.title( "Event at {}s".format( self.start ))

        plt.xlabel( "Time (s)" )
        plt.ylabel( "Current (pA)" )

        plt.ylim( self.min - 5, self.max  )
        plt.xlim( 0, self.duration )

    def to_meta( self ):
        for prop in ['mean', 'std', 'duration', 'start', 'min', 'max', 'end', 'start']:
            with ignored( AttributeError, KeyError ):
                self.__dict__[prop] = getattr( self, prop )

        with ignored( AttributeError ):
            del self.current

        for segment in self.segments:
            segment.to_meta()

        self.__class__ = type( "MetaEvent", ( MetaEvent, ), self.__dict__ )

    def to_dict( self ):
        keys = ['mean', 'std', 'min', 'max', 'start', 'end', 'duration', 'filtered',
                'filter_order', 'filter_cutoff', 'n', 'state_parser', 'segments' ]
        d = { i: getattr( self, i ) for i in keys if hasattr( self, i ) }
        d['name'] = self.__class__.__name__
        return d

    def to_json( self, filename=None ):
        d = self.to_dict()

        with ignored( KeyError, AttributeError ):
            d['segments'] = [ seg.to_dict() for seg in d['segments'] ]

        with ignored( KeyError, AttributeError ):
            d['state_parser'] = d['state_parser'].to_dict()

        _json = json.dumps( d, indent=4, separators=( ',', ' : ' ) )
        if filename:
            with open( filename, 'w' ) as out:
                out.write( _json )
        return _json

    @classmethod
    def from_json( cls, _json ) :
        if _json.endswith( ".json" ):
            with open( _json, 'r' ) as infile:
                _json = ''.join(line for line in infile)

        d = json.loads( _json )

        event = MetaSegment()
        if 'current' not in d.keys():
            event.__class__ = type("MetaEvent", (MetaEvent, ), d )
        else:
            event = cls( d['current'], d['start'], )

        return event

    @classmethod
    def from_segments( cls, segments ):
        try:
            current = np.concatenate( [seg.current for seg in segments] )
            return cls( current=current, start=0, segments=segments )

        except AttributeError:
            dur = sum( seg.duration for seg in segments )
            mean = np.mean([ seg.mean*seg.duration for seg in segments ]) / dur
            std = np.sqrt( sum( seg.std ** 2 * seg.duration for seg in segments ) / dur )

            self = cls( current=np.array([ seg.mean for seg in segments ]), start=0,
                segments=segments, mean=mean, std=std )
            self.__class__ = type( "MetaEvent", (Event,), self.__dict__ )
            return self

    @classmethod
    def from_database( cls, database, host, password, user, AnalysisID, SerialID ):
        db = MySQLDatabaseInterface(db=database, host=host, password=password, user=user)

        EventID, start, end = db.read("SELECT ID, start, end FROM Events WHERE AnalysisID = {0} AND SerialID = {1}".format(AnalysisID, SerialID))[0]

        state_query = np.array( db.read( "SELECT start, end, mean, std FROM Segments WHERE EventID = {}".format(EventID)))

        segments = [ MetaSegment( start=start, end=end, mean=mean,
                                  std=std, duration=end-start ) for start, end, mean, std in state_query ]

        Event.from_segments( cls, segments )

    @property
    def n( self ):
        return len( self.segments )

class File( Segment ):
    '''
    A container for the raw ionic current pulled from a .abf file, and metadata as to
    the events detected in the file.
    '''
    def __init__( self, filename=None, current=None, timestep=None, **kwargs ):
        # Must either provide the current and timestep, or the filename
        if current is not None and timestep is not None:
            filename = ""
        elif filename and current is None and timestep is None:
            timestep, current = read_abf( filename )
            filename = filename.split("\\")[-1].split(".abf")[0]
        else:
            raise SyntaxError( "Must provide current and timestep, or filename \
                corresponding to a valid abf file." )

        Segment.__init__(self, current=current, filename=filename, second=1000./timestep,
                                events=[], sample=None,  )

    def __getitem__( self, index ):
        return self.events[ index ]

    def parse( self, parser = lambda_event_parser( threshold=90 ) ):
        '''
        Applies one of the plug-n-play event parsers for event detection. The parser must have a .parse method
        which returns a tuple corresponding to the start of each event, and the ionic current in them.
        '''

        self.events = [ Event( current=seg.current,
                               start=seg.start / self.second,
                               end=(seg.start+seg.duration) / self.second,
                               duration=seg.duration / self.second,
                               second=self.second,
                               file=self ) for seg in parser.parse( self.current ) ]

        self.event_parser = parser

    def close( self ):
        '''
        Close the file, deleting all data associated with it. A wrapper for the delete function.
        '''

        self.delete()

    def delete( self ):
        '''
        Delete the file, and everything that is a part of it, including the ionic current stored
        to it, other properties, and all events. Calls delete on all events to remove them and all
        underlying data.
        '''

        with ignored( AttributeError ):
            del self.current

        with ignored( AttributeError ):
            del self.event_parser

        for event in self.events:
            event.delete()
        del self

    def plot( self, limits=None, color_events=True, event_downsample=5,
        file_downsample=100, downsample=10, file_kwargs={ 'c':'k', 'alpha':0.66 },
        event_kwargs={ 'c': 'c', 'alpha':0.66 }, **kwargs ):
        '''
        Allows you to plot a file, optionally coloring the events in a file. You may also give a
        dictionary of settings to color the event by, and dictionary of settings to color the
        rest of the file by. You may also specify the downsampling for the event and the rest
        of the file separately, because otherwise it may be too much data to plot.
        '''

        step = 1./self.second
        second = self.second

        # Allows you to only plot a certain portion of the file
        limits = limits or ( 0, len( self.current) * step )
        start, end = limits

        # If you want to apply special settings to the events and the rest of the file
        # separately, you need to go through each part and plot it individually.
        if color_events and self.n > 0:
            # Pick out all of the events, as opposed to non-event related current
            # in the file.
            events = [ event for event in self.events if event.start > start and event.end < end ]

            # If there are no events, just plot using the given settings.
            if len(events) == 0:
                plt.plot( np.arange( start*second, end*second ),
                    self.current[ start*second:end*second ], **kwargs )

            else:
                current = self.current[ int( start*second ):int( events[0].start*second ):\
                    file_downsample ]
                plt.plot( np.arange(0, len(current) )*step*file_downsample+start,
                    current, **file_kwargs )

            for i, event in enumerate( events ):
                si, ei = int(event.start*second), int(event.end*second)
                current = self.current[ si:ei:event_downsample ]
                plt.plot( np.arange(0, len(current) )*step*event_downsample+event.start,
                    current, **event_kwargs )

                si, ei = ei, int( end*second ) if i == len(events)-1 else int( events[i+1].start*self.second )
                current = self.current[ si:ei:file_downsample ]
                plt.plot( np.arange( 0, len(current) )*step*file_downsample+event.end,
                    current, **file_kwargs )

        else:
            current = self.current[ start*second:end*second:downsample ]
            plt.plot( np.arange( 0, len(current) )*step*downsample+start, current, **kwargs )

        plt.title( "File {}".format( self.filename ) )
        plt.ylabel( "Current (pA)" )
        plt.xlabel( "Time (s)" )
        plt.xlim( start, end )

    def to_meta( self ):
        '''
        Remove the ionic current stored for this file, and do the same for all underlying
        structures in order to remove all references to that list.
        '''

        with ignored( AttributeError ):
            del self.current

        for event in self.events:
            event.to_meta()

    def to_dict( self ):
        '''
        Return a dictionary of the important data that underlies this file. This is done with the
        intention of producing a json from it.
        '''

        keys = [ 'filename', 'n', 'event_parser', 'mean', 'std', 'duration', 'start', 'end', 'events' ]
        if not hasattr( self, 'end' ) and ( hasattr( self, 'start') and hasattr( self, 'duration') ):
            setattr( self, 'end', self.start + self.duration )
        d = { i: getattr( self, i ) for i in keys if hasattr( self, i ) }
        d['name'] = self.__class__.__name__
        return d

    def to_json( self, filename=None ):
        '''
        Return a json (in the form of a string) that represents the file, and allows for
        reconstruction of the instance from, using cls.from_json.
        '''

        d = self.to_dict()

        devents = []
        for event in d['events']:
            devent = event.to_dict()
            try:
                devent['segments'] = [ state.to_dict() for state in devent['segments'] ]
                devent['state_parser'] = devent['state_parser'].to_dict()
            except:
                with ignored( KeyError, AttributeError ):
                    del devent['segments']
                    del devent['state_parser']
            devents.append( devent )

        d['events'] = devents
        d['event_parser'] = d['event_parser'].to_dict()

        _json = json.dumps( d, indent=4, separators=( ',', ' : ' ) )

        if filename:
            with open( filename, 'w' ) as outfile:
                outfile.write( _json )
        return _json

    @classmethod
    def from_json( cls, _json ):
        '''
        Read in a json (string format) and produce a file instance and all associated event
        instances.
        '''

        if _json.endswith(".json"):
            with open( _json, 'r' ) as infile:
                _json = ''.join(line for line in infile)

        d = json.loads( _json )

        if d['name'] != "File":
            raise TypeError( "JSON does not encode a file" )

        try:
            file = File( filename=d['filename']+".abf" )
            meta = False
        except:
            file = File( current=[], timestep=1 )
            meta = True

        file.event_parser = parser.from_json( json.dumps(d['event_parser']) )
        file.events = []

        for _json in d['events']:
            s, e = int(_json['start']*file.second), int(_json['end']*file.second)

            if meta:
                event = MetaEvent( **_json )
            else:
                current = file.current[ s:e ]
                event = Event( current=current,
                               start=s / file.second,
                               end=e / file.second,
                               duration=(e-s) / file.second,
                               second=file.second,
                               file=file )

            if _json['filtered']:
                if not meta:
                    event.filter( order=_json['filter_order'], cutoff=_json['filter_cutoff'] )

            if meta:
                event.segments = [ MetaSegment( **s_json ) for s_json in _json['segments'] ]
            else:
                event.segments = [ Segment( current=event.current[ int(s_json['start']*file.second):
                                                                   int(s_json['end']*file.second) ],
                                            second=file.second,
                                            event=event,
                                            **s_json )
                                    for s_json in _json['segments'] ]

            event.state_parser = parser.from_json( json.dumps( _json['state_parser'] ) )
            event.filtered = _json['filtered']
            file.events.append( event )

        return file

    @classmethod
    def from_database( cls, database, host, password, user, AnalysisID=None, filename=None,
                       eventDetector=None, eventDetectorParams=None, segmenter=None,
                       segmenterParams=None, filterCutoff=None, filterOrder=None  ):
        '''
        Loads the cache for the file, if this exists. Can either provide the AnalysisID to unambiguously
        know which analysis to use, or the filename if you want the most recent analysis done on that file.
        '''

        db = MySQLDatabaseInterface(db=database, host=host, password=password, user=user)

        keys = ( "ID", "Filename", "EventDetector", "EventDetectorParams",
                 "segmenter", "segmenterParams", "FilterCutoff", "FilterOrder" )
        vals = ( AnalysisID, filename, eventDetector, eventDetectorParams, segmenter,
                 segmenterParams, filterCutoff, filterOrder )

        query_list = []
        for key, val in zip( keys, vals ):
            if val:
                if key not in ['ID', 'FilterCutoff', 'FilterOrder']:
                    query_list += ["{key} = '{val}'".format( key=key, val=val )]
                else:
                    query_list += ["{key} = {val}".format( key=key, val=val )]

        query = "SELECT * FROM AnalysisMetadata WHERE "+" AND ".join(query_list)+" ORDER BY TimeStamp DESC"

        try:
            filename, _, AnalysisID = db.read( query )[0][0:3]
        except:
            raise DatabaseError("No analysis found with given parameters.")

        try:
            file = File(filename+".abf")
        except:
            raise IOError("File must be in local directory to parse from database.")

        query = np.array( db.read( "SELECT ID, SerialID, start, end FROM Events WHERE AnalysisID = {0}".format(AnalysisID) ) )
        EventID, SerialID, starts, ends = query[:, 0], query[:, 1], query[:, 2], query[:,3]
        starts, ends = map( int, starts ), map( int, ends )

        file.parse( parser=MemoryParse( starts, ends ) )

        for i in SerialID:
            state_query = np.array( db.read( "SELECT start, end FROM Segments WHERE EventID = {}".format(EventID[i]) ) )
            with ignored( IndexError ):
                starts, ends = state_query[:,0], state_query[:,1]
                file.events[i].parse( parser=MemoryParse( starts, ends ) )

        return file


    def to_database( self, database, host, password, user ):
        '''
        Caches the file to the database. This will create an entry in the AnalysisMetadata table
        for this file, and will add each event to the Event table, and each Segment to the Segment
        table. The split points are stored de facto due to the start and end parameters in the events
        and segments, and so this segmentation can be reloaded using from_database.
        '''

        db = MySQLDatabaseInterface(db=database, host=host, password=password, user=user)

        event_parser_name = self.event_parser.__class__.__name__
        event_parser_params = repr( self.event_parser )
        try:
            state_parser_name = self.events[0].state_parser.__class__.__name__
            state_parser_params = repr( self.events[0].state_parser )
        except:
            state_parser_name = "NULL"
            state_parser_params = "NULL"

        try:
            filter_order = self.events[0].filter_order
            filter_cutoff = self.events[0].filter_cutoff
        except:
            filter_order = "NULL"
            filter_cutoff = "NULL"

        metadata = "'{0}',NULL,NULL,'{1}','{2}','{3}','{4}', {5}, {6}".format( self.filename,
                                                                     event_parser_name,
                                                                     event_parser_params,
                                                                     state_parser_name,
                                                                     state_parser_params,
                                                                     filter_order,
                                                                     filter_cutoff
                                                                    )
        try:
            prevAnalysisID = db.read("SELECT ID FROM AnalysisMetadata WHERE Filename = '{0}' AND EventDetector = '{1}' AND segmenter = '{2}'".format(
                self.filename, event_parser_name, state_parser_name ) )[0][0]
        except IndexError:
            prevAnalysisID = None

        if prevAnalysisID is not None:
            prevAnalysisEventIDs = db.read( "SELECT ID FROM Events WHERE AnalysisID = {0}".format( prevAnalysisID ) )
            for ID in prevAnalysisEventIDs:
                ID = ID[0]
                db.execute( "DELETE FROM Segments WHERE EventID = {0}".format( ID ) )
                db.execute( "DELETE FROM Events WHERE ID = {0}".format( ID) )
            db.execute( "DELETE FROM AnalysisMetadata WHERE ID = {0}".format( prevAnalysisID ) )

        db.execute("INSERT INTO AnalysisMetadata VALUES({0})".format(metadata))
        analysisID = db.read("SELECT ID FROM AnalysisMetadata ORDER BY Timestamp DESC")[0][0]

        for i, event in enumerate( self.events ):
            values = "VALUES ({0},{1},{2},{3},{4},{5},NULL)".format( int(analysisID),
                                                                     i,
                                                                     event.start*100000,
                                                                     event.end*100000,
                                                                     event.mean,
                                                                     event.std
                                                                    )
            db.execute( "INSERT INTO Events " + values )

            event_id = db.read( "SELECT ID FROM Events WHERE AnalysisID = '{0}' AND SerialID = {1}".format( analysisID, i) ) [-1][-1]
            for j, seg in enumerate( event.segments ):
                values = "VALUES ({0},{1},{2},{3},{4},{5})".format( int(event_id),
                                                                    j,
                                                                    seg.start*100000,
                                                                    seg.end*100000,
                                                                    seg.mean,
                                                                    seg.std,
                                                                   )
                db.execute( "INSERT INTO Segments " + values )

    @property
    def n( self ):
        return len( self.events )