5from clang.cindex
import Index, CursorKind, TypeKind, Config
39 key = (cursor.kind, getattr(cursor.extent,
'start',
None)
and cursor.extent.start.offset,
40 getattr(cursor.extent,
'end',
None)
and cursor.extent.end.offset, cursor.spelling)
46 for ch
in cursor.get_children():
47 if ch.spelling == name
and ch.kind
in (CursorKind.PARM_DECL, CursorKind.VAR_DECL):
265 for var
in pointer_vars:
267 s = re.sub(
r'\b' + re.escape(var) +
r"\s*==\s*NULL\b",
ReplEq(var, lit_ranges, call_ranges), s)
268 s = re.sub(
r'\bNULL\s*==\s*' + re.escape(var) +
r"\b",
ReplEq(var, lit_ranges, call_ranges), s)
270 s = re.sub(
r'\b' + re.escape(var) +
r"\s*!=\s*NULL\b",
ReplNeq(var, lit_ranges, call_ranges), s)
271 s = re.sub(
r'\bNULL\s*!=\s*' + re.escape(var) +
r"\b",
ReplNeq(var, lit_ranges, call_ranges), s)
275 for var
in pointer_vars:
277 pat =
r'(?<![=])!\s*' + re.escape(var) +
r'\b(?!\s*(?:->|\.|\[|\())'
278 s = re.sub(pat,
ReplNotVar(var, lit_ranges, call_ranges), s)
282 for var
in pointer_vars:
285 for m
in re.finditer(
r'\b' + re.escape(var) +
r'\b', s):
286 start, end = m.start(), m.end()
294 before = s[
max(0, start - 10):start]
295 after = s[end:end + 10]
297 if re.search(
r'(->|\.)\s*$', before)
or re.match(
r'\s*(->|\.|\[)', after):
300 if re.search(
r'!\s*$', before):
303 prefix = s[
max(0, start - 16):start]
304 if 'IS_NULL_PTR' in prefix
or '!IS_NULL_PTR' in prefix:
307 if re.match(
r"\s*(?:==|!=|<=|>=|<|>)", s[end:]):
308 mright = re.match(
r"\s*(?:==|!=|<=|>=|<|>)\s*NULL\b", s[end:])
311 if re.search(
r"(?:==|!=|<=|>=|<|>)\s*$", s[
max(0, start - 10):start]):
312 mleft = re.search(
r"\bNULL\s*(?:==|!=|<=|>=|<|>)\s*$", s[
max(0, start - 30):start])
316 if re.search(
r'\*\s*$', before):
318 out.append((start, end))
324 for (start, end)
in out:
325 parts.append(s[last:start])
326 parts.append(f
'!IS_NULL_PTR({var})')
328 parts.append(s[last:])
354 """Collect identifier names that appear as arguments of any call-like token sequence
355 inside cond_node by tokenizing the source via libclang. This handles macros
356 and other call-like constructs that do not appear as CALL_EXPR in the AST.
358 tu = cond_node.translation_unit
360 tokens = list(tu.get_tokens(extent=cond_node.extent))
364 while i < len(tokens) - 1:
368 kind_name = getattr(t.kind,
'name',
None)
369 if kind_name ==
'IDENTIFIER' and getattr(nxt,
'spelling',
'') ==
'(':
376 while j < len(tokens):
377 s = tokens[j].spelling
386 kname = getattr(tokens[j].kind,
'name',
None)
387 if kname ==
'IDENTIFIER':
388 content_idents.append(s)
390 for ident
in content_idents:
417 if kind == CursorKind.BINARY_OPERATOR:
419 tokens = list(tu.get_tokens(extent=n.extent))
420 ops = [t.spelling
for t
in tokens
if t.spelling
in (
'==',
'!=')]
425 children = list(n.get_children())
429 if ch.kind == CursorKind.DECL_REF_EXPR:
430 ref = getattr(ch,
'referenced',
None)
436 if decl_child
and null_found:
437 var_text =
node_text(src_bytes, decl_child).strip()
439 new = f
'IS_NULL_PTR({var_text})'
441 new = f
'!IS_NULL_PTR({var_text})'
442 start = n.extent.start.offset
443 end = n.extent.end.offset
444 edits.append((start, end, new.encode(
'utf-8'),
node_text(src_bytes, n), new))
448 if kind == CursorKind.UNARY_OPERATOR:
449 tokens = list(tu.get_tokens(extent=n.extent))
450 if tokens
and tokens[0].spelling ==
'!':
451 children = list(n.get_children())
452 if len(children) == 1
and children[0].kind == CursorKind.DECL_REF_EXPR:
453 ref = getattr(children[0],
'referenced',
None)
455 var_text =
node_text(src_bytes, children[0]).strip()
456 new = f
'IS_NULL_PTR({var_text})'
457 start = n.extent.start.offset
458 end = n.extent.end.offset
459 edits.append((start, end, new.encode(
'utf-8'),
node_text(src_bytes, n), new))
463 if kind == CursorKind.DECL_REF_EXPR:
464 ref = getattr(n,
'referenced',
None)
467 if parent
is not None and parent.kind
in (CursorKind.CALL_EXPR, CursorKind.MEMBER_REF_EXPR, CursorKind.ARRAY_SUBSCRIPT_EXPR):
469 elif parent
is not None and parent.kind == CursorKind.BINARY_OPERATOR:
473 var_text =
node_text(src_bytes, n).strip()
474 new = f
'!IS_NULL_PTR({var_text})'
475 start = n.extent.start.offset
476 end = n.extent.end.offset
477 edits.append((start, end, new.encode(
'utf-8'),
node_text(src_bytes, n), new))
479 for ch
in n.get_children():
484 text =
get_text(src_bytes, node.extent)
491 CursorKind.BINARY_OPERATOR,
492 CursorKind.UNARY_OPERATOR,
493 CursorKind.DECL_REF_EXPR,
494 CursorKind.CALL_EXPR,
495 CursorKind.PAREN_EXPR,
496 CursorKind.COMPOUND_STMT,
498 for c
in list(node.get_children()):
500 if not (c.extent.start.offset >= node.extent.start.offset
and c.extent.end.offset <= node.extent.end.offset):
503 if c.kind
in expr_kinds
or c.kind.is_expression():
509 if cond_node
is not None:
511 start = cond_node.extent.start.offset
513 window_start =
max(node.extent.start.offset, start - 256)
514 prefix = src_bytes[window_start:start].decode(
'utf-8',
'replace')
515 idx = prefix.rfind(
'(')
522 end = cond_node.extent.end.offset
523 window_end =
min(node.extent.end.offset, end + 256)
524 suffix = src_bytes[end:window_end].decode(
'utf-8',
'replace')
525 if ')' not in suffix:
531 if cond_node
is not None:
533 cond_text =
node_text(src_bytes, cond_node)
536 for m
in re.finditer(
r"[A-Za-z_]\w*", cond_text):
539 idents.append(m.group(0))
540 skip = {NULL_NAME,
'IS_NULL_PTR',
'sizeof'}
544 func = cond_node.semantic_parent
545 while func
is not None and func.kind != CursorKind.FUNCTION_DECL:
546 func = func.semantic_parent
548 tu = node.translation_unit
552 tu_key = id(tu.cursor)
553 if tu_key
not in _global_decls_cache:
555 global_decls = _global_decls_cache[tu_key]
560 decl = func_decls.get(ident)
562 decl = global_decls.get(ident)
567 call_arg_names = set()
572 call_arg_names.update(token_args)
575 pointer_vars = sorted(x
for x
in found
if x
not in call_arg_names)
580 if new_cond != cond_text:
581 start = cond_node.extent.start.offset
582 end = cond_node.extent.end.offset
583 edits.append((start, end, new_cond.encode(
'utf-8'), cond_text, new_cond))
586 print(
"[IF]", path,
"\n", text)
587 print(
"[IFCOND]", path,
"\n", cond_text)
588 print(
"[IFVARS]", path,
"\n", pointer_vars)
592 index = Index.create()
593 tu = index.parse(str(path), args=[
"-x",
"c",
"-std=c11"])
596 src_bytes = path.read_bytes()
597 src_text = src_bytes.decode(
"utf-8",
"replace")
599 visit(tu.cursor, src_bytes, path, edits)
603 edits.sort(key=
lambda e: e[0], reverse=
True)
605 for (start, end, new_bytes, old_text, new_text)
in edits:
606 before = new_src[:start]
607 after = new_src[end:]
608 new_src = before + new_bytes + after
609 print(
"[EDIT]", path,
"start=", start,
"end=", end,
"->", len(new_bytes),
"bytes",
"\n-", old_text,
"\n+", new_text)
610 path.write_bytes(new_src)
611 print(
"[MODIFIED]", path)
614def visit(node, src_bytes, path, edits):
616 if node.kind == CursorKind.IF_STMT:
620 for child
in node.get_children():
621 visit(child, src_bytes, path, edits)