Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1""" 

2Read SAS sas7bdat or xport files. 

3""" 

4from pandas.io.common import stringify_path 

5 

6 

7def read_sas( 

8 filepath_or_buffer, 

9 format=None, 

10 index=None, 

11 encoding=None, 

12 chunksize=None, 

13 iterator=False, 

14): 

15 """ 

16 Read SAS files stored as either XPORT or SAS7BDAT format files. 

17 

18 Parameters 

19 ---------- 

20 filepath_or_buffer : str, path object or file-like object 

21 Any valid string path is acceptable. The string could be a URL. Valid 

22 URL schemes include http, ftp, s3, and file. For file URLs, a host is 

23 expected. A local file could be: 

24 ``file://localhost/path/to/table.sas``. 

25 

26 If you want to pass in a path object, pandas accepts any 

27 ``os.PathLike``. 

28 

29 By file-like object, we refer to objects with a ``read()`` method, 

30 such as a file handler (e.g. via builtin ``open`` function) 

31 or ``StringIO``. 

32 format : str {'xport', 'sas7bdat'} or None 

33 If None, file format is inferred from file extension. If 'xport' or 

34 'sas7bdat', uses the corresponding format. 

35 index : identifier of index column, defaults to None 

36 Identifier of column that should be used as index of the DataFrame. 

37 encoding : str, default is None 

38 Encoding for text data. If None, text data are stored as raw bytes. 

39 chunksize : int 

40 Read file `chunksize` lines at a time, returns iterator. 

41 iterator : bool, defaults to False 

42 If True, returns an iterator for reading the file incrementally. 

43 

44 Returns 

45 ------- 

46 DataFrame if iterator=False and chunksize=None, else SAS7BDATReader 

47 or XportReader 

48 """ 

49 if format is None: 

50 buffer_error_msg = ( 

51 "If this is a buffer object rather " 

52 "than a string name, you must specify " 

53 "a format string" 

54 ) 

55 filepath_or_buffer = stringify_path(filepath_or_buffer) 

56 if not isinstance(filepath_or_buffer, str): 

57 raise ValueError(buffer_error_msg) 

58 fname = filepath_or_buffer.lower() 

59 if fname.endswith(".xpt"): 

60 format = "xport" 

61 elif fname.endswith(".sas7bdat"): 

62 format = "sas7bdat" 

63 else: 

64 raise ValueError("unable to infer format of SAS file") 

65 

66 if format.lower() == "xport": 

67 from pandas.io.sas.sas_xport import XportReader 

68 

69 reader = XportReader( 

70 filepath_or_buffer, index=index, encoding=encoding, chunksize=chunksize 

71 ) 

72 elif format.lower() == "sas7bdat": 

73 from pandas.io.sas.sas7bdat import SAS7BDATReader 

74 

75 reader = SAS7BDATReader( 

76 filepath_or_buffer, index=index, encoding=encoding, chunksize=chunksize 

77 ) 

78 else: 

79 raise ValueError("unknown SAS format") 

80 

81 if iterator or chunksize: 

82 return reader 

83 

84 data = reader.read() 

85 reader.close() 

86 return data