Coverage for cc_modules/cc_nlp.py : 15%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1#!/usr/bin/env python
3"""
4camcops_server/cc_modules/cc_nlp.py
6===============================================================================
8 Copyright (C) 2012-2020 Rudolf Cardinal (rudolf@pobox.com).
10 This file is part of CamCOPS.
12 CamCOPS is free software: you can redistribute it and/or modify
13 it under the terms of the GNU General Public License as published by
14 the Free Software Foundation, either version 3 of the License, or
15 (at your option) any later version.
17 CamCOPS is distributed in the hope that it will be useful,
18 but WITHOUT ANY WARRANTY; without even the implied warranty of
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 GNU General Public License for more details.
22 You should have received a copy of the GNU General Public License
23 along with CamCOPS. If not, see <https://www.gnu.org/licenses/>.
25===============================================================================
27**Natural language processing functions (of sorts).**
29"""
31from typing import Dict
33# =============================================================================
34# Processing names
35# =============================================================================
37TITLES = [
38 "DR",
39 "PROF",
40 "MR",
41 "MISS",
42 "MRS",
43 "MS",
44 "SR"
45]
48def guess_name_components(s: str, uppercase: bool = True) -> Dict[str, str]:
49 """
50 Takes a string such as 'Dr James T. Smith, M.D.' and returns parts.
52 This will not be perfect! If it isn't reasonably sure, it returns
53 everything in the surname field.
55 Examples it will fail on:
57 - Nurse Specialist Jones
59 Returns:
60 dict: dictionary with keys "surname", "forename", "prefix"
62 """
63 # Hard.
64 # http://stackoverflow.com/questions/4276905/
66 prefix = ""
67 forename = ""
69 # 1. Separate on spaces, chucking any blanks
70 if s:
71 parts = [p for p in s.split(" ") if p]
72 else:
73 parts = []
75 # 2. Prefix?
76 if len(parts) > 0:
77 p = parts[0]
78 if "." in p or p.replace(".", "").upper() in TITLES:
79 prefix = p
80 parts = parts[1:]
82 # 3. Forename, surname
83 if len(parts) == 2:
84 if parts[0][-1] == ",": # SURNAME, FORENAME
85 forename = parts[1]
86 surname = parts[0]
87 else: # FORENAME SURNAME
88 forename = parts[0]
89 surname = parts[1]
90 else: # No idea, really; shove it all in the surname component.
91 surname = " ".join(parts)
93 if uppercase:
94 surname = surname.upper()
95 forename = forename.upper()
96 prefix = prefix.upper()
97 return dict(
98 surname=surname,
99 forename=forename,
100 prefix=prefix
101 )