Coverage for cc_modules/cc_nlp.py: 12%
25 statements
« prev ^ index » next coverage.py v7.9.2, created at 2025-07-15 14:23 +0100
« prev ^ index » next coverage.py v7.9.2, created at 2025-07-15 14:23 +0100
1"""
2camcops_server/cc_modules/cc_nlp.py
4===============================================================================
6 Copyright (C) 2012, University of Cambridge, Department of Psychiatry.
7 Created by Rudolf Cardinal (rnc1001@cam.ac.uk).
9 This file is part of CamCOPS.
11 CamCOPS is free software: you can redistribute it and/or modify
12 it under the terms of the GNU General Public License as published by
13 the Free Software Foundation, either version 3 of the License, or
14 (at your option) any later version.
16 CamCOPS is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 GNU General Public License for more details.
21 You should have received a copy of the GNU General Public License
22 along with CamCOPS. If not, see <https://www.gnu.org/licenses/>.
24===============================================================================
26**Natural language processing functions (of sorts).**
28"""
30from typing import Dict
32# =============================================================================
33# Processing names
34# =============================================================================
36TITLES = ["DR", "PROF", "MR", "MISS", "MRS", "MS", "SR"]
39def guess_name_components(s: str, uppercase: bool = True) -> Dict[str, str]:
40 """
41 Takes a string such as 'Dr James T. Smith, M.D.' and returns parts.
43 This will not be perfect! If it isn't reasonably sure, it returns
44 everything in the surname field.
46 Examples it will fail on:
48 - Nurse Specialist Jones
50 Returns:
51 dict: dictionary with keys "surname", "forename", "prefix"
53 """
54 # Hard.
55 # http://stackoverflow.com/questions/4276905/
57 prefix = ""
58 forename = ""
60 # 1. Separate on spaces, chucking any blanks
61 if s:
62 parts = [p for p in s.split(" ") if p]
63 else:
64 parts = []
66 # 2. Prefix?
67 if len(parts) > 0:
68 p = parts[0]
69 if "." in p or p.replace(".", "").upper() in TITLES:
70 prefix = p
71 parts = parts[1:]
73 # 3. Forename, surname
74 if len(parts) == 2:
75 if parts[0][-1] == ",": # SURNAME, FORENAME
76 forename = parts[1]
77 surname = parts[0]
78 else: # FORENAME SURNAME
79 forename = parts[0]
80 surname = parts[1]
81 else: # No idea, really; shove it all in the surname component.
82 surname = " ".join(parts)
84 if uppercase:
85 surname = surname.upper()
86 forename = forename.upper()
87 prefix = prefix.upper()
88 return dict(surname=surname, forename=forename, prefix=prefix)