Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

#!/usr/bin/env python3 

 

import numpy as np 

from pprint import pprint 

 

def yield_wells(experiments, keyword=None): 

    for experiment in experiments: 

        for condition in experiment['wells']: 

            for well in experiment['wells'][condition]: 

                if keyword is None or \ 

                        keyword in experiment['label'] or \ 

                        keyword == condition or \ 

                        keyword == well.label: 

                    yield experiment, condition, well 

 

def yield_unique_wells(experiments, keyword=None): 

    previous_wells = set() 

    for experiment, condition, well in yield_wells(experiments, keyword): 

        if well not in previous_wells: 

            previous_wells.add(well) 

            yield experiment, condition, well 

 

def clear_all_processing_steps(): 

    global _all_processing_steps 

    _all_processing_steps = [] 

 

def run_all_processing_steps(experiments): 

    for step in _all_processing_steps: 

        step(experiments) 

 

_all_processing_steps = [] 

 

 

class ProcessingStep: 

    """ 

    A modular transformation that can be applied to flow cytometry data. 

 

    The purpose of this class is primarily to abstract the process of iterating  

    through the data structure created by load_experiments().  Each experiment  

    can contain a number of flow cytometry data frames.  Iterating through all  

    of them to apply a common transformation is common enough that it was worth  

    supporting with a bit of a framework, and that's what this class is. 

    """ 

 

    def __new__(cls): 

        """ 

        Keep track of all the processing steps that get instantiated.  This  

        functionality is required by run_all_processing_steps(). 

        """ 

        # Implement __new__() instead of __init__() because it's less likely  

        # that subclasses will overwrite __new__() and forget to call this  

        # method. 

        step = super().__new__(cls) 

        _all_processing_steps.append(step) 

        return step 

 

    def __call__(self, experiments): 

        """ 

        Apply this processing step to all of the given experiments. 

         

        The actual processing is delegated to process_experiment(), which can  

        be overwritten by subclasses.  The default process_experiment() calls  

        process_well() on each well, which nicely abstracts the process of  

        iterating through the experiment data structure. 

        """ 

        for experiment in experiments: 

            self.process_experiment(experiment) 

 

    def process_experiment(self, experiment): 

        """ 

        Iterate over all the wells in the given experiment. 

         

        The processing of each well is delegated to process_well(), which is an  

        abstract method.  If process_well() returns a data frame, it replaces  

        the existing well data.  If process_well() returns None, it is assumed  

        that the well data was modified in place. 

        """ 

        for condition, wells in experiment['wells'].items(): 

            for i, well in enumerate(wells): 

                processed_data = self.process_well(experiment, well) 

                if processed_data is not None: 

                    experiment['wells'][condition][i].data = processed_data 

 

    def process_well(self, experiment, well):   # (abstract) 

        """ 

        Process the data from an individual well in any way. 

 

        This method can either return a new data frame, which will replace the  

        existing one for the given well, or it can just modify the given well  

        in place. 

        """ 

        raise NotImplementedError(self.__class__.__name__) 

 

 

class KeepRelevantChannels(ProcessingStep): 

    """ 

    Discard any channels that aren't explicitly listed. 

 

    This is just useful for making processing a little faster and output a  

    little cleaner if you collected data for more channels than you needed to,  

    for whatever reason. 

    """ 

 

    def __init__(self, channels=None): 

        self.channels = None 

 

    def process_well(self, experiment, well): 

        return well.data.reindex(columns=self.channels) 

 

 

class LogTransformation(ProcessingStep): 

 

    def __init__(self, channels=None): 

        self.channels = channels or [] 

 

    def process_well(self, experiment, well): 

        for channel in self.channels: 

            well.data[channel] = np.log10(well.data[channel]) 

 

 

class GatingStep(ProcessingStep): 

 

    def process_well(self, experiment, well): 

        selection = self.gate(experiment, well) 

        if selection is not None: 

            return well.data.drop(well.data.index[selection]) 

 

    def gate(self, experiment, well): 

        raise NotImplementedError 

 

 

class GateNonPositiveEvents(GatingStep): 

 

    def __init__(self, channels=None): 

        self.channels = None 

 

    def gate(self, experiment, well): 

        channels = self.channels or well.data.columns 

        masks = [well.data[channel] <= 0 for channel in channels] 

        return np.any(np.vstack(masks), axis=0) 

 

 

class GateSmallCells(GatingStep): 

 

    def __init__(self, threshold=40, save_size_col=False): 

        self.threshold = threshold 

        self.save_size_col = save_size_col 

 

    def gate(self, experiment, well): 

        from scipy.stats import linregress 

        fsc, ssc = well.data['FSC-A'], well.data['SSC-A'] 

        m, b, *quality = linregress(fsc, ssc) 

        sizes = fsc + m * ssc 

        if self.save_size_col: 

            well.data['FSC-A + m * SSC-A'] = sizes 

        return sizes < np.percentile(sizes, self.threshold) 

 

 

class GateEarlyEvents(GatingStep): 

 

    def __init__(self, throwaway_secs=2): 

        self.throwaway_secs = throwaway_secs 

 

    def gate(self, experiment, well): 

        secs = well.data['Time'] * float(well.meta['$TIMESTEP']) 

        return secs < self.throwaway_secs