check_names.py 33 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910
  1. #!/usr/bin/env python3
  2. #
  3. # Copyright The Mbed TLS Contributors
  4. # SPDX-License-Identifier: Apache-2.0
  5. #
  6. # Licensed under the Apache License, Version 2.0 (the "License"); you may
  7. # not use this file except in compliance with the License.
  8. # You may obtain a copy of the License at
  9. #
  10. # http://www.apache.org/licenses/LICENSE-2.0
  11. #
  12. # Unless required by applicable law or agreed to in writing, software
  13. # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
  14. # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. # See the License for the specific language governing permissions and
  16. # limitations under the License.
  17. """
  18. This script confirms that the naming of all symbols and identifiers in Mbed TLS
  19. are consistent with the house style and are also self-consistent. It only runs
  20. on Linux and macOS since it depends on nm.
  21. It contains two major Python classes, CodeParser and NameChecker. They both have
  22. a comprehensive "run-all" function (comprehensive_parse() and perform_checks())
  23. but the individual functions can also be used for specific needs.
  24. CodeParser makes heavy use of regular expressions to parse the code, and is
  25. dependent on the current code formatting. Many Python C parser libraries require
  26. preprocessed C code, which means no macro parsing. Compiler tools are also not
  27. very helpful when we want the exact location in the original source (which
  28. becomes impossible when e.g. comments are stripped).
  29. NameChecker performs the following checks:
  30. - All exported and available symbols in the library object files, are explicitly
  31. declared in the header files. This uses the nm command.
  32. - All macros, constants, and identifiers (function names, struct names, etc)
  33. follow the required regex pattern.
  34. - Typo checking: All words that begin with MBED exist as macros or constants.
  35. The script returns 0 on success, 1 on test failure, and 2 if there is a script
  36. error. It must be run from Mbed TLS root.
  37. """
  38. import abc
  39. import argparse
  40. import fnmatch
  41. import glob
  42. import textwrap
  43. import os
  44. import sys
  45. import traceback
  46. import re
  47. import enum
  48. import shutil
  49. import subprocess
  50. import logging
  51. # Naming patterns to check against. These are defined outside the NameCheck
  52. # class for ease of modification.
  53. MACRO_PATTERN = r"^(MBEDTLS|PSA)_[0-9A-Z_]*[0-9A-Z]$"
  54. CONSTANTS_PATTERN = MACRO_PATTERN
  55. IDENTIFIER_PATTERN = r"^(mbedtls|psa)_[0-9a-z_]*[0-9a-z]$"
  56. class Match(): # pylint: disable=too-few-public-methods
  57. """
  58. A class representing a match, together with its found position.
  59. Fields:
  60. * filename: the file that the match was in.
  61. * line: the full line containing the match.
  62. * line_no: the line number.
  63. * pos: a tuple of (start, end) positions on the line where the match is.
  64. * name: the match itself.
  65. """
  66. def __init__(self, filename, line, line_no, pos, name):
  67. # pylint: disable=too-many-arguments
  68. self.filename = filename
  69. self.line = line
  70. self.line_no = line_no
  71. self.pos = pos
  72. self.name = name
  73. def __str__(self):
  74. """
  75. Return a formatted code listing representation of the erroneous line.
  76. """
  77. gutter = format(self.line_no, "4d")
  78. underline = self.pos[0] * " " + (self.pos[1] - self.pos[0]) * "^"
  79. return (
  80. " {0} |\n".format(" " * len(gutter)) +
  81. " {0} | {1}".format(gutter, self.line) +
  82. " {0} | {1}\n".format(" " * len(gutter), underline)
  83. )
  84. class Problem(abc.ABC): # pylint: disable=too-few-public-methods
  85. """
  86. An abstract parent class representing a form of static analysis error.
  87. It extends an Abstract Base Class, which means it is not instantiable, and
  88. it also mandates certain abstract methods to be implemented in subclasses.
  89. """
  90. # Class variable to control the quietness of all problems
  91. quiet = False
  92. def __init__(self):
  93. self.textwrapper = textwrap.TextWrapper()
  94. self.textwrapper.width = 80
  95. self.textwrapper.initial_indent = " > "
  96. self.textwrapper.subsequent_indent = " "
  97. def __str__(self):
  98. """
  99. Unified string representation method for all Problems.
  100. """
  101. if self.__class__.quiet:
  102. return self.quiet_output()
  103. return self.verbose_output()
  104. @abc.abstractmethod
  105. def quiet_output(self):
  106. """
  107. The output when --quiet is enabled.
  108. """
  109. pass
  110. @abc.abstractmethod
  111. def verbose_output(self):
  112. """
  113. The default output with explanation and code snippet if appropriate.
  114. """
  115. pass
  116. class SymbolNotInHeader(Problem): # pylint: disable=too-few-public-methods
  117. """
  118. A problem that occurs when an exported/available symbol in the object file
  119. is not explicitly declared in header files. Created with
  120. NameCheck.check_symbols_declared_in_header()
  121. Fields:
  122. * symbol_name: the name of the symbol.
  123. """
  124. def __init__(self, symbol_name):
  125. self.symbol_name = symbol_name
  126. Problem.__init__(self)
  127. def quiet_output(self):
  128. return "{0}".format(self.symbol_name)
  129. def verbose_output(self):
  130. return self.textwrapper.fill(
  131. "'{0}' was found as an available symbol in the output of nm, "
  132. "however it was not declared in any header files."
  133. .format(self.symbol_name))
  134. class PatternMismatch(Problem): # pylint: disable=too-few-public-methods
  135. """
  136. A problem that occurs when something doesn't match the expected pattern.
  137. Created with NameCheck.check_match_pattern()
  138. Fields:
  139. * pattern: the expected regex pattern
  140. * match: the Match object in question
  141. """
  142. def __init__(self, pattern, match):
  143. self.pattern = pattern
  144. self.match = match
  145. Problem.__init__(self)
  146. def quiet_output(self):
  147. return (
  148. "{0}:{1}:{2}"
  149. .format(self.match.filename, self.match.line_no, self.match.name)
  150. )
  151. def verbose_output(self):
  152. return self.textwrapper.fill(
  153. "{0}:{1}: '{2}' does not match the required pattern '{3}'."
  154. .format(
  155. self.match.filename,
  156. self.match.line_no,
  157. self.match.name,
  158. self.pattern
  159. )
  160. ) + "\n" + str(self.match)
  161. class Typo(Problem): # pylint: disable=too-few-public-methods
  162. """
  163. A problem that occurs when a word using MBED doesn't appear to be defined as
  164. constants nor enum values. Created with NameCheck.check_for_typos()
  165. Fields:
  166. * match: the Match object of the MBED name in question.
  167. """
  168. def __init__(self, match):
  169. self.match = match
  170. Problem.__init__(self)
  171. def quiet_output(self):
  172. return (
  173. "{0}:{1}:{2}"
  174. .format(self.match.filename, self.match.line_no, self.match.name)
  175. )
  176. def verbose_output(self):
  177. return self.textwrapper.fill(
  178. "{0}:{1}: '{2}' looks like a typo. It was not found in any "
  179. "macros or any enums. If this is not a typo, put "
  180. "//no-check-names after it."
  181. .format(self.match.filename, self.match.line_no, self.match.name)
  182. ) + "\n" + str(self.match)
  183. class CodeParser():
  184. """
  185. Class for retrieving files and parsing the code. This can be used
  186. independently of the checks that NameChecker performs, for example for
  187. list_internal_identifiers.py.
  188. """
  189. def __init__(self, log):
  190. self.log = log
  191. self.check_repo_path()
  192. # Memo for storing "glob expression": set(filepaths)
  193. self.files = {}
  194. # Globally excluded filenames.
  195. # Note that "*" can match directory separators in exclude lists.
  196. self.excluded_files = ["*/bn_mul", "*/compat-1.3.h"]
  197. @staticmethod
  198. def check_repo_path():
  199. """
  200. Check that the current working directory is the project root, and throw
  201. an exception if not.
  202. """
  203. if not all(os.path.isdir(d) for d in ["include", "library", "tests"]):
  204. raise Exception("This script must be run from Mbed TLS root")
  205. def comprehensive_parse(self):
  206. """
  207. Comprehensive ("default") function to call each parsing function and
  208. retrieve various elements of the code, together with the source location.
  209. Returns a dict of parsed item key to the corresponding List of Matches.
  210. """
  211. self.log.info("Parsing source code...")
  212. self.log.debug(
  213. "The following files are excluded from the search: {}"
  214. .format(str(self.excluded_files))
  215. )
  216. all_macros = self.parse_macros([
  217. "include/mbedtls/*.h",
  218. "include/psa/*.h",
  219. "library/*.h",
  220. "tests/include/test/drivers/*.h",
  221. "3rdparty/everest/include/everest/everest.h",
  222. "3rdparty/everest/include/everest/x25519.h"
  223. ])
  224. enum_consts = self.parse_enum_consts([
  225. "include/mbedtls/*.h",
  226. "library/*.h",
  227. "3rdparty/everest/include/everest/everest.h",
  228. "3rdparty/everest/include/everest/x25519.h"
  229. ])
  230. identifiers = self.parse_identifiers([
  231. "include/mbedtls/*.h",
  232. "include/psa/*.h",
  233. "library/*.h",
  234. "3rdparty/everest/include/everest/everest.h",
  235. "3rdparty/everest/include/everest/x25519.h"
  236. ])
  237. mbed_words = self.parse_mbed_words([
  238. "include/mbedtls/*.h",
  239. "include/psa/*.h",
  240. "library/*.h",
  241. "3rdparty/everest/include/everest/everest.h",
  242. "3rdparty/everest/include/everest/x25519.h",
  243. "library/*.c",
  244. "3rdparty/everest/library/everest.c",
  245. "3rdparty/everest/library/x25519.c"
  246. ])
  247. symbols = self.parse_symbols()
  248. # Remove identifier macros like mbedtls_printf or mbedtls_calloc
  249. identifiers_justname = [x.name for x in identifiers]
  250. actual_macros = []
  251. for macro in all_macros:
  252. if macro.name not in identifiers_justname:
  253. actual_macros.append(macro)
  254. self.log.debug("Found:")
  255. # Aligns the counts on the assumption that none exceeds 4 digits
  256. self.log.debug(" {:4} Total Macros".format(len(all_macros)))
  257. self.log.debug(" {:4} Non-identifier Macros".format(len(actual_macros)))
  258. self.log.debug(" {:4} Enum Constants".format(len(enum_consts)))
  259. self.log.debug(" {:4} Identifiers".format(len(identifiers)))
  260. self.log.debug(" {:4} Exported Symbols".format(len(symbols)))
  261. return {
  262. "macros": actual_macros,
  263. "enum_consts": enum_consts,
  264. "identifiers": identifiers,
  265. "symbols": symbols,
  266. "mbed_words": mbed_words
  267. }
  268. def is_file_excluded(self, path, exclude_wildcards):
  269. """Whether the given file path is excluded."""
  270. # exclude_wildcards may be None. Also, consider the global exclusions.
  271. exclude_wildcards = (exclude_wildcards or []) + self.excluded_files
  272. for pattern in exclude_wildcards:
  273. if fnmatch.fnmatch(path, pattern):
  274. return True
  275. return False
  276. def get_files(self, include_wildcards, exclude_wildcards):
  277. """
  278. Get all files that match any of the UNIX-style wildcards. While the
  279. check_names script is designed only for use on UNIX/macOS (due to nm),
  280. this function alone would work fine on Windows even with forward slashes
  281. in the wildcard.
  282. Args:
  283. * include_wildcards: a List of shell-style wildcards to match filepaths.
  284. * exclude_wildcards: a List of shell-style wildcards to exclude.
  285. Returns a List of relative filepaths.
  286. """
  287. accumulator = set()
  288. for include_wildcard in include_wildcards:
  289. accumulator = accumulator.union(glob.iglob(include_wildcard))
  290. return list(path for path in accumulator
  291. if not self.is_file_excluded(path, exclude_wildcards))
  292. def parse_macros(self, include, exclude=None):
  293. """
  294. Parse all macros defined by #define preprocessor directives.
  295. Args:
  296. * include: A List of glob expressions to look for files through.
  297. * exclude: A List of glob expressions for excluding files.
  298. Returns a List of Match objects for the found macros.
  299. """
  300. macro_regex = re.compile(r"# *define +(?P<macro>\w+)")
  301. exclusions = (
  302. "asm", "inline", "EMIT", "_CRT_SECURE_NO_DEPRECATE", "MULADDC_"
  303. )
  304. files = self.get_files(include, exclude)
  305. self.log.debug("Looking for macros in {} files".format(len(files)))
  306. macros = []
  307. for header_file in files:
  308. with open(header_file, "r", encoding="utf-8") as header:
  309. for line_no, line in enumerate(header):
  310. for macro in macro_regex.finditer(line):
  311. if macro.group("macro").startswith(exclusions):
  312. continue
  313. macros.append(Match(
  314. header_file,
  315. line,
  316. line_no,
  317. macro.span("macro"),
  318. macro.group("macro")))
  319. return macros
  320. def parse_mbed_words(self, include, exclude=None):
  321. """
  322. Parse all words in the file that begin with MBED, in and out of macros,
  323. comments, anything.
  324. Args:
  325. * include: A List of glob expressions to look for files through.
  326. * exclude: A List of glob expressions for excluding files.
  327. Returns a List of Match objects for words beginning with MBED.
  328. """
  329. # Typos of TLS are common, hence the broader check below than MBEDTLS.
  330. mbed_regex = re.compile(r"\bMBED.+?_[A-Z0-9_]*")
  331. exclusions = re.compile(r"// *no-check-names|#error")
  332. files = self.get_files(include, exclude)
  333. self.log.debug("Looking for MBED words in {} files".format(len(files)))
  334. mbed_words = []
  335. for filename in files:
  336. with open(filename, "r", encoding="utf-8") as fp:
  337. for line_no, line in enumerate(fp):
  338. if exclusions.search(line):
  339. continue
  340. for name in mbed_regex.finditer(line):
  341. mbed_words.append(Match(
  342. filename,
  343. line,
  344. line_no,
  345. name.span(0),
  346. name.group(0)))
  347. return mbed_words
  348. def parse_enum_consts(self, include, exclude=None):
  349. """
  350. Parse all enum value constants that are declared.
  351. Args:
  352. * include: A List of glob expressions to look for files through.
  353. * exclude: A List of glob expressions for excluding files.
  354. Returns a List of Match objects for the findings.
  355. """
  356. files = self.get_files(include, exclude)
  357. self.log.debug("Looking for enum consts in {} files".format(len(files)))
  358. # Emulate a finite state machine to parse enum declarations.
  359. # OUTSIDE_KEYWORD = outside the enum keyword
  360. # IN_BRACES = inside enum opening braces
  361. # IN_BETWEEN = between enum keyword and opening braces
  362. states = enum.Enum("FSM", ["OUTSIDE_KEYWORD", "IN_BRACES", "IN_BETWEEN"])
  363. enum_consts = []
  364. for header_file in files:
  365. state = states.OUTSIDE_KEYWORD
  366. with open(header_file, "r", encoding="utf-8") as header:
  367. for line_no, line in enumerate(header):
  368. # Match typedefs and brackets only when they are at the
  369. # beginning of the line -- if they are indented, they might
  370. # be sub-structures within structs, etc.
  371. if (state == states.OUTSIDE_KEYWORD and
  372. re.search(r"^(typedef +)?enum +{", line)):
  373. state = states.IN_BRACES
  374. elif (state == states.OUTSIDE_KEYWORD and
  375. re.search(r"^(typedef +)?enum", line)):
  376. state = states.IN_BETWEEN
  377. elif (state == states.IN_BETWEEN and
  378. re.search(r"^{", line)):
  379. state = states.IN_BRACES
  380. elif (state == states.IN_BRACES and
  381. re.search(r"^}", line)):
  382. state = states.OUTSIDE_KEYWORD
  383. elif (state == states.IN_BRACES and
  384. not re.search(r"^ *#", line)):
  385. enum_const = re.search(r"^ *(?P<enum_const>\w+)", line)
  386. if not enum_const:
  387. continue
  388. enum_consts.append(Match(
  389. header_file,
  390. line,
  391. line_no,
  392. enum_const.span("enum_const"),
  393. enum_const.group("enum_const")))
  394. return enum_consts
  395. IGNORED_CHUNK_REGEX = re.compile('|'.join([
  396. r'/\*.*?\*/', # block comment entirely on one line
  397. r'//.*', # line comment
  398. r'(?P<string>")(?:[^\\\"]|\\.)*"', # string literal
  399. ]))
  400. def strip_comments_and_literals(self, line, in_block_comment):
  401. """Strip comments and string literals from line.
  402. Continuation lines are not supported.
  403. If in_block_comment is true, assume that the line starts inside a
  404. block comment.
  405. Return updated values of (line, in_block_comment) where:
  406. * Comments in line have been replaced by a space (or nothing at the
  407. start or end of the line).
  408. * String contents have been removed.
  409. * in_block_comment indicates whether the line ends inside a block
  410. comment that continues on the next line.
  411. """
  412. # Terminate current multiline comment?
  413. if in_block_comment:
  414. m = re.search(r"\*/", line)
  415. if m:
  416. in_block_comment = False
  417. line = line[m.end(0):]
  418. else:
  419. return '', True
  420. # Remove full comments and string literals.
  421. # Do it all together to handle cases like "/*" correctly.
  422. # Note that continuation lines are not supported.
  423. line = re.sub(self.IGNORED_CHUNK_REGEX,
  424. lambda s: '""' if s.group('string') else ' ',
  425. line)
  426. # Start an unfinished comment?
  427. # (If `/*` was part of a complete comment, it's already been removed.)
  428. m = re.search(r"/\*", line)
  429. if m:
  430. in_block_comment = True
  431. line = line[:m.start(0)]
  432. return line, in_block_comment
  433. IDENTIFIER_REGEX = re.compile('|'.join([
  434. # Match " something(a" or " *something(a". Functions.
  435. # Assumptions:
  436. # - function definition from return type to one of its arguments is
  437. # all on one line
  438. # - function definition line only contains alphanumeric, asterisk,
  439. # underscore, and open bracket
  440. r".* \**(\w+) *\( *\w",
  441. # Match "(*something)(".
  442. r".*\( *\* *(\w+) *\) *\(",
  443. # Match names of named data structures.
  444. r"(?:typedef +)?(?:struct|union|enum) +(\w+)(?: *{)?$",
  445. # Match names of typedef instances, after closing bracket.
  446. r"}? *(\w+)[;[].*",
  447. ]))
  448. # The regex below is indented for clarity.
  449. EXCLUSION_LINES = re.compile("|".join([
  450. r"extern +\"C\"",
  451. r"(typedef +)?(struct|union|enum)( *{)?$",
  452. r"} *;?$",
  453. r"$",
  454. r"//",
  455. r"#",
  456. ]))
  457. def parse_identifiers_in_file(self, header_file, identifiers):
  458. """
  459. Parse all lines of a header where a function/enum/struct/union/typedef
  460. identifier is declared, based on some regex and heuristics. Highly
  461. dependent on formatting style.
  462. Append found matches to the list ``identifiers``.
  463. """
  464. with open(header_file, "r", encoding="utf-8") as header:
  465. in_block_comment = False
  466. # The previous line variable is used for concatenating lines
  467. # when identifiers are formatted and spread across multiple
  468. # lines.
  469. previous_line = ""
  470. for line_no, line in enumerate(header):
  471. line, in_block_comment = \
  472. self.strip_comments_and_literals(line, in_block_comment)
  473. if self.EXCLUSION_LINES.match(line):
  474. previous_line = ""
  475. continue
  476. # If the line contains only space-separated alphanumeric
  477. # characters (or underscore, asterisk, or open parenthesis),
  478. # and nothing else, high chance it's a declaration that
  479. # continues on the next line
  480. if re.search(r"^([\w\*\(]+\s+)+$", line):
  481. previous_line += line
  482. continue
  483. # If previous line seemed to start an unfinished declaration
  484. # (as above), concat and treat them as one.
  485. if previous_line:
  486. line = previous_line.strip() + " " + line.strip() + "\n"
  487. previous_line = ""
  488. # Skip parsing if line has a space in front = heuristic to
  489. # skip function argument lines (highly subject to formatting
  490. # changes)
  491. if line[0] == " ":
  492. continue
  493. identifier = self.IDENTIFIER_REGEX.search(line)
  494. if not identifier:
  495. continue
  496. # Find the group that matched, and append it
  497. for group in identifier.groups():
  498. if not group:
  499. continue
  500. identifiers.append(Match(
  501. header_file,
  502. line,
  503. line_no,
  504. identifier.span(),
  505. group))
  506. def parse_identifiers(self, include, exclude=None):
  507. """
  508. Parse all lines of a header where a function/enum/struct/union/typedef
  509. identifier is declared, based on some regex and heuristics. Highly
  510. dependent on formatting style.
  511. Args:
  512. * include: A List of glob expressions to look for files through.
  513. * exclude: A List of glob expressions for excluding files.
  514. Returns a List of Match objects with identifiers.
  515. """
  516. files = self.get_files(include, exclude)
  517. self.log.debug("Looking for identifiers in {} files".format(len(files)))
  518. identifiers = []
  519. for header_file in files:
  520. self.parse_identifiers_in_file(header_file, identifiers)
  521. return identifiers
  522. def parse_symbols(self):
  523. """
  524. Compile the Mbed TLS libraries, and parse the TLS, Crypto, and x509
  525. object files using nm to retrieve the list of referenced symbols.
  526. Exceptions thrown here are rethrown because they would be critical
  527. errors that void several tests, and thus needs to halt the program. This
  528. is explicitly done for clarity.
  529. Returns a List of unique symbols defined and used in the libraries.
  530. """
  531. self.log.info("Compiling...")
  532. symbols = []
  533. # Back up the config and atomically compile with the full configratuion.
  534. shutil.copy(
  535. "include/mbedtls/config.h",
  536. "include/mbedtls/config.h.bak"
  537. )
  538. try:
  539. # Use check=True in all subprocess calls so that failures are raised
  540. # as exceptions and logged.
  541. subprocess.run(
  542. ["python3", "scripts/config.py", "full"],
  543. universal_newlines=True,
  544. check=True
  545. )
  546. my_environment = os.environ.copy()
  547. my_environment["CFLAGS"] = "-fno-asynchronous-unwind-tables"
  548. # Run make clean separately to lib to prevent unwanted behavior when
  549. # make is invoked with parallelism.
  550. subprocess.run(
  551. ["make", "clean"],
  552. universal_newlines=True,
  553. check=True
  554. )
  555. subprocess.run(
  556. ["make", "lib"],
  557. env=my_environment,
  558. universal_newlines=True,
  559. stdout=subprocess.PIPE,
  560. stderr=subprocess.STDOUT,
  561. check=True
  562. )
  563. # Perform object file analysis using nm
  564. symbols = self.parse_symbols_from_nm([
  565. "library/libmbedcrypto.a",
  566. "library/libmbedtls.a",
  567. "library/libmbedx509.a"
  568. ])
  569. subprocess.run(
  570. ["make", "clean"],
  571. universal_newlines=True,
  572. check=True
  573. )
  574. except subprocess.CalledProcessError as error:
  575. self.log.debug(error.output)
  576. raise error
  577. finally:
  578. # Put back the original config regardless of there being errors.
  579. # Works also for keyboard interrupts.
  580. shutil.move(
  581. "include/mbedtls/config.h.bak",
  582. "include/mbedtls/config.h"
  583. )
  584. return symbols
  585. def parse_symbols_from_nm(self, object_files):
  586. """
  587. Run nm to retrieve the list of referenced symbols in each object file.
  588. Does not return the position data since it is of no use.
  589. Args:
  590. * object_files: a List of compiled object filepaths to search through.
  591. Returns a List of unique symbols defined and used in any of the object
  592. files.
  593. """
  594. nm_undefined_regex = re.compile(r"^\S+: +U |^$|^\S+:$")
  595. nm_valid_regex = re.compile(r"^\S+( [0-9A-Fa-f]+)* . _*(?P<symbol>\w+)")
  596. exclusions = ("FStar", "Hacl")
  597. symbols = []
  598. # Gather all outputs of nm
  599. nm_output = ""
  600. for lib in object_files:
  601. nm_output += subprocess.run(
  602. ["nm", "-og", lib],
  603. universal_newlines=True,
  604. stdout=subprocess.PIPE,
  605. stderr=subprocess.STDOUT,
  606. check=True
  607. ).stdout
  608. for line in nm_output.splitlines():
  609. if not nm_undefined_regex.search(line):
  610. symbol = nm_valid_regex.search(line)
  611. if (symbol and not symbol.group("symbol").startswith(exclusions)):
  612. symbols.append(symbol.group("symbol"))
  613. else:
  614. self.log.error(line)
  615. return symbols
  616. class NameChecker():
  617. """
  618. Representation of the core name checking operation performed by this script.
  619. """
  620. def __init__(self, parse_result, log):
  621. self.parse_result = parse_result
  622. self.log = log
  623. def perform_checks(self, quiet=False):
  624. """
  625. A comprehensive checker that performs each check in order, and outputs
  626. a final verdict.
  627. Args:
  628. * quiet: whether to hide detailed problem explanation.
  629. """
  630. self.log.info("=============")
  631. Problem.quiet = quiet
  632. problems = 0
  633. problems += self.check_symbols_declared_in_header()
  634. pattern_checks = [
  635. ("macros", MACRO_PATTERN),
  636. ("enum_consts", CONSTANTS_PATTERN),
  637. ("identifiers", IDENTIFIER_PATTERN)
  638. ]
  639. for group, check_pattern in pattern_checks:
  640. problems += self.check_match_pattern(group, check_pattern)
  641. problems += self.check_for_typos()
  642. self.log.info("=============")
  643. if problems > 0:
  644. self.log.info("FAIL: {0} problem(s) to fix".format(str(problems)))
  645. if quiet:
  646. self.log.info("Remove --quiet to see explanations.")
  647. else:
  648. self.log.info("Use --quiet for minimal output.")
  649. return 1
  650. else:
  651. self.log.info("PASS")
  652. return 0
  653. def check_symbols_declared_in_header(self):
  654. """
  655. Perform a check that all detected symbols in the library object files
  656. are properly declared in headers.
  657. Assumes parse_names_in_source() was called before this.
  658. Returns the number of problems that need fixing.
  659. """
  660. problems = []
  661. for symbol in self.parse_result["symbols"]:
  662. found_symbol_declared = False
  663. for identifier_match in self.parse_result["identifiers"]:
  664. if symbol == identifier_match.name:
  665. found_symbol_declared = True
  666. break
  667. if not found_symbol_declared:
  668. problems.append(SymbolNotInHeader(symbol))
  669. self.output_check_result("All symbols in header", problems)
  670. return len(problems)
  671. def check_match_pattern(self, group_to_check, check_pattern):
  672. """
  673. Perform a check that all items of a group conform to a regex pattern.
  674. Assumes parse_names_in_source() was called before this.
  675. Args:
  676. * group_to_check: string key to index into self.parse_result.
  677. * check_pattern: the regex to check against.
  678. Returns the number of problems that need fixing.
  679. """
  680. problems = []
  681. for item_match in self.parse_result[group_to_check]:
  682. if not re.search(check_pattern, item_match.name):
  683. problems.append(PatternMismatch(check_pattern, item_match))
  684. # Double underscore should not be used for names
  685. if re.search(r".*__.*", item_match.name):
  686. problems.append(
  687. PatternMismatch("no double underscore allowed", item_match))
  688. self.output_check_result(
  689. "Naming patterns of {}".format(group_to_check),
  690. problems)
  691. return len(problems)
  692. def check_for_typos(self):
  693. """
  694. Perform a check that all words in the soure code beginning with MBED are
  695. either defined as macros, or as enum constants.
  696. Assumes parse_names_in_source() was called before this.
  697. Returns the number of problems that need fixing.
  698. """
  699. problems = []
  700. # Set comprehension, equivalent to a list comprehension wrapped by set()
  701. all_caps_names = {
  702. match.name
  703. for match
  704. in self.parse_result["macros"] + self.parse_result["enum_consts"]}
  705. typo_exclusion = re.compile(r"XXX|__|_$|^MBEDTLS_.*CONFIG_FILE$|"
  706. r"MBEDTLS_TEST_LIBTESTDRIVER*")
  707. for name_match in self.parse_result["mbed_words"]:
  708. found = name_match.name in all_caps_names
  709. # Since MBEDTLS_PSA_ACCEL_XXX defines are defined by the
  710. # PSA driver, they will not exist as macros. However, they
  711. # should still be checked for typos using the equivalent
  712. # BUILTINs that exist.
  713. if "MBEDTLS_PSA_ACCEL_" in name_match.name:
  714. found = name_match.name.replace(
  715. "MBEDTLS_PSA_ACCEL_",
  716. "MBEDTLS_PSA_BUILTIN_") in all_caps_names
  717. if not found and not typo_exclusion.search(name_match.name):
  718. problems.append(Typo(name_match))
  719. self.output_check_result("Likely typos", problems)
  720. return len(problems)
  721. def output_check_result(self, name, problems):
  722. """
  723. Write out the PASS/FAIL status of a performed check depending on whether
  724. there were problems.
  725. Args:
  726. * name: the name of the test
  727. * problems: a List of encountered Problems
  728. """
  729. if problems:
  730. self.log.info("{}: FAIL\n".format(name))
  731. for problem in problems:
  732. self.log.warning(str(problem))
  733. else:
  734. self.log.info("{}: PASS".format(name))
  735. def main():
  736. """
  737. Perform argument parsing, and create an instance of CodeParser and
  738. NameChecker to begin the core operation.
  739. """
  740. parser = argparse.ArgumentParser(
  741. formatter_class=argparse.RawDescriptionHelpFormatter,
  742. description=(
  743. "This script confirms that the naming of all symbols and identifiers "
  744. "in Mbed TLS are consistent with the house style and are also "
  745. "self-consistent.\n\n"
  746. "Expected to be run from the MbedTLS root directory.")
  747. )
  748. parser.add_argument(
  749. "-v", "--verbose",
  750. action="store_true",
  751. help="show parse results"
  752. )
  753. parser.add_argument(
  754. "-q", "--quiet",
  755. action="store_true",
  756. help="hide unnecessary text, explanations, and highlighs"
  757. )
  758. args = parser.parse_args()
  759. # Configure the global logger, which is then passed to the classes below
  760. log = logging.getLogger()
  761. log.setLevel(logging.DEBUG if args.verbose else logging.INFO)
  762. log.addHandler(logging.StreamHandler())
  763. try:
  764. code_parser = CodeParser(log)
  765. parse_result = code_parser.comprehensive_parse()
  766. except Exception: # pylint: disable=broad-except
  767. traceback.print_exc()
  768. sys.exit(2)
  769. name_checker = NameChecker(parse_result, log)
  770. return_code = name_checker.perform_checks(quiet=args.quiet)
  771. sys.exit(return_code)
  772. if __name__ == "__main__":
  773. main()