mirror of
https://github.com/clearlinux/clr-avx-tools.git
synced 2026-05-13 10:23:43 +00:00
Golang binaries have been observed to contain function names with many other ASCII characters not captured by the previous regex. Adjust that regex by permitting arbitrary characters for the function name component of the line (between the ` <` and `>:`) and declaring the expected content for the rest of the line. Signed-off-by: Patrick McCarty <patrick.mccarty@intel.com>
348 lines
12 KiB
Python
348 lines
12 KiB
Python
"""
|
|
avxjudge.py is a tool that RPM builds use when building for AVX2 or AVX512
|
|
optimisations. It attempts to heuristically guess whether the library file has
|
|
enough AVX instructions to be worth keeping.
|
|
"""
|
|
#!/usr/bin/python3
|
|
import subprocess
|
|
import sys
|
|
import re
|
|
import argparse
|
|
import os
|
|
|
|
# MMX and SSE2 instructions
|
|
sse_instructions_xmm = set([
|
|
"paddb", "paddd", "paddsb", "paddsw", "paddusb", "psubw",
|
|
"paddusw", "paddw", "pmaddwd", "pmulhw", "pmullw", "psubb", "psubsb",
|
|
"psubsw", "psubusb", "paddusw", "paddw", "pmaddwd", "pmulhw", "pmullw",
|
|
"psubb", "psubd", "psubd", "psubsb", "psubsw", "psubusb", "psubusw"
|
|
])
|
|
|
|
# 0.1 value instructions
|
|
avx2_instructions_lv = set(["shrx", "rorx", "shlx", "shrx", "shrx", "movbe"])
|
|
avx2_instructions_ymm = set([
|
|
"vpaddq", "vpaddd", "vpsubq", "vpsubd", "vmulpd", "vaddpd", "vsubpd",
|
|
"vmulps", "vaddps", "vsubps", "vpmaxsq", "vpminsq", "vpmuludq",
|
|
"vpand", "vpmaxud", "vpminud", "vpmaxsd", "vpmaxsw", "vpminsd",
|
|
"vpminsw", "vpand", "vpor", "vpmulld"
|
|
])
|
|
avx512_instructions_lv = set()
|
|
|
|
# 1.0 value instructions
|
|
avx2_instructions = set([
|
|
"vfmadd132ss", "vfmadd213ss", "vfmadd231ss", "vfmadd132sd",
|
|
"vfmadd231sd", "vfmadd213sd",
|
|
"vfmsub132ss", "vfmsub213ss", "vfmsub231ss", "vfmsub132sd", "vfmsub231sd",
|
|
"vfmsub213sd",
|
|
"vfnmadd132ss", "vfnmadd213ss", "vfnmadd231ss", "vfnmadd132sd",
|
|
"vfnmadd231sd", "vfnmadd213sd",
|
|
"vfnmsub132ss", "vfnmsub213ss", "vfnmsub231ss", "vfnmsub132sd",
|
|
"vfnmsub231sd", "vfnmsub213sd",
|
|
])
|
|
avx512_instructions = set(["kmovw", "vpcmpltd", "kshiftrw", "kmovb"])
|
|
|
|
# 2.0 value instructions
|
|
avx2_instructions_hv = set([
|
|
"vpclmulhqlqdq", "vpclmullqhqdq",
|
|
"vfmadd132ps", "vfmadd213ps", "vfmadd231ps", "vfmadd132pd", "vfmadd231pd",
|
|
"vfmadd213pd", "vfmsub132ps", "vfmsub213ps", "vfmsub231ps", "vfmsub132pd",
|
|
"vfmsub231pd", "vfmsub213pd",
|
|
"vfnmadd132ps", "vfnmadd213ps", "vfnmadd231ps", "vfnmadd132pd",
|
|
"vfnmadd231pd", "vfnmadd213pd", "vfnmsub132ps", "vfnmsub213ps",
|
|
"vfnmsub231ps", "vfnmsub132pd", "vfnmsub231pd", "vfnmsub213pd", "vdivpd",
|
|
])
|
|
avx512_instructions_hv = set()
|
|
|
|
# Minimum thresholds for keeping libraries
|
|
min_count = 10
|
|
min_score = 1.0
|
|
|
|
debug = 0
|
|
|
|
class FunctionRecord():
|
|
def __init__(self):
|
|
self.scores = {"sse": 0.0, "avx2": 0.0, "avx512": 0.0}
|
|
self.counts = {"sse": 0, "avx2": 0, "avx512": 0}
|
|
self.instructions = 0
|
|
self.name = ""
|
|
|
|
|
|
class RecordKeeper():
|
|
def __init__(self, delete_type):
|
|
self.total_counts = {"sse": 0, "avx2": 0, "avx512": 0}
|
|
self.total_scores = {"sse": 0.0, "avx2": 0.0, "avx512": 0.0}
|
|
self.functions = {"sse": dict(), "avx2": dict(), "avx512": dict()}
|
|
self.ratios = {"sse": dict(), "avx2": dict(), "avx512": dict()}
|
|
self.function_record = FunctionRecord()
|
|
self.delete_type = delete_type
|
|
|
|
def should_delete(self) -> bool:
|
|
if self.delete_type and self.total_counts[self.delete_type] < min_count and self.total_scores[self.delete_type] <= min_score:
|
|
return True
|
|
return False
|
|
|
|
def finalize_function_attrs(self):
|
|
for i in ("sse", "avx2", "avx512"):
|
|
if self.function_record.counts[i] >= 1:
|
|
self.functions[i][self.function_record.name] = self.function_record.scores[i]
|
|
self.ratios[i][self.function_record.name] = 100.0 * self.function_record.counts[i] / self.function_record.instructions
|
|
self.total_scores[i] += self.function_record.scores[i]
|
|
self.total_counts[i] += self.function_record.counts[i]
|
|
|
|
|
|
def is_sse(instruction:str, args:str) -> float:
|
|
|
|
val: float = -1.0
|
|
if "%xmm" in args:
|
|
if ("pd" in instruction or "ps" in instruction or instruction in sse_instructions_xmm):
|
|
val = 1.0
|
|
else:
|
|
val = 0.01
|
|
return val
|
|
|
|
|
|
def is_avx2(instruction:str, args:str) -> float:
|
|
val: float = -1.0
|
|
|
|
if "%ymm" in args:
|
|
if ("pd" in instruction or "ps" in instruction or instruction in avx2_instructions_ymm) and "xor" not in instruction and "vmov" not in instruction:
|
|
val = 1.0
|
|
else:
|
|
val = 0.01
|
|
|
|
if instruction in avx2_instructions_lv:
|
|
val = max(val, 0.1)
|
|
if instruction in avx2_instructions:
|
|
val = max(val, 1.0)
|
|
if instruction in avx2_instructions_hv:
|
|
val = max(val, 2.0)
|
|
|
|
return val
|
|
|
|
def has_high_register(args: str) -> bool:
|
|
return args.endswith((
|
|
'mm16', 'mm17', 'mm18', 'mm19', 'mm20', 'mm21', 'mm22',
|
|
'mm23', 'mm24', 'mm25', 'mm26', 'mm27', 'mm28', 'mm29',
|
|
'mm30', 'mm31'
|
|
))
|
|
|
|
def is_avx512(instruction:str, args:str) -> float:
|
|
val: float = -1.0
|
|
|
|
if instruction in avx512_instructions_lv:
|
|
val = max(val, 0.1)
|
|
if instruction in avx512_instructions:
|
|
val = max(val, 1.0)
|
|
if instruction in avx512_instructions_hv:
|
|
val = max(val, 2.0)
|
|
|
|
if "xor" not in instruction and "%ymm" in args and has_high_register(args):
|
|
val = max(val, 0.2)
|
|
if "xor" not in instruction and has_high_register(args):
|
|
val = max(val, 0.1)
|
|
|
|
if "{%k" in args: # predicate instructions
|
|
val = max(val, 0.1)
|
|
if "{1to" in args: # broadcast-as-part-of-mov this saves a whole other instruction
|
|
val = max(val, 1.0)
|
|
|
|
if "%zmm" in args:
|
|
if ("pd" in instruction or "ps" in instruction or "vpadd" in instruction or "vpsub" in instruction or instruction in avx2_instructions_ymm) and "xor" not in instruction and "vmov" not in instruction:
|
|
val = max(val, 1.0)
|
|
else:
|
|
val = max(val, 0.01)
|
|
|
|
|
|
return val
|
|
|
|
|
|
def ratio(f: float) -> str:
|
|
f = f * 100
|
|
f = round(f)/100.0
|
|
return str(f)
|
|
|
|
def print_top_functions(records:RecordKeeper) -> None:
|
|
def summarize(table: dict, is_pct: bool, max_funcs: int = 5) -> None:
|
|
for f in sorted(table, key=table.get, reverse=True)[:max_funcs]:
|
|
f = " %-30s\t%s" % (f, ratio(table[f]))
|
|
|
|
if is_pct:
|
|
print(f, "%s")
|
|
else:
|
|
print(f)
|
|
|
|
sets = (
|
|
("SSE", records.functions["sse"], records.ratios["sse"]),
|
|
("AVX2", records.functions["avx2"], records.ratios["avx2"]),
|
|
("AVX512", records.functions["avx512"], records.ratios["avx512"]),
|
|
)
|
|
|
|
for set_name, funcs, funcs_ratio in sets:
|
|
print("Top %s functions by instruction count" % set_name)
|
|
summarize(funcs_ratio, True)
|
|
print()
|
|
|
|
print("Top %s functions by value" % set_name)
|
|
summarize(funcs, False)
|
|
print()
|
|
|
|
sse_avx2_duplicate_cnt = 0
|
|
avx2_avx512_duplicate_cnt = 0
|
|
|
|
def print_function_summary(records):
|
|
print(records.function_record.name,
|
|
"\t", ratio(records.function_record.counts["sse"] / records.function_record.instructions),
|
|
"\t", ratio(records.function_record.counts["avx2"] / records.function_record.instructions),
|
|
"\t", ratio(records.function_record.counts["avx512"] / records.function_record.instructions),
|
|
"\t", records.function_record.scores["sse"],
|
|
"\t", records.function_record.scores["avx2"],
|
|
"\t", records.function_record.scores["avx512"])
|
|
|
|
def process_objdump_line(records:RecordKeeper, line:str, verbose:int, quiet:int) -> None:
|
|
sse_score = -1.0
|
|
avx2_score = -1.0
|
|
avx512_score = -1.0
|
|
sse_str = " "
|
|
avx2_str = " "
|
|
avx512_str = ""
|
|
|
|
global sse_avx2_duplicate_cnt
|
|
global avx2_avx512_duplicate_cnt
|
|
global debug
|
|
|
|
match = re.search("^$", line)
|
|
if match:
|
|
if records.function_record.instructions > 0 and verbose > 0:
|
|
print()
|
|
print_function_summary(records)
|
|
if verbose > 0:
|
|
print()
|
|
if records.function_record.instructions > 0:
|
|
records.finalize_function_attrs()
|
|
records.function_record = FunctionRecord()
|
|
return
|
|
|
|
match = re.search("^(.*)\#.*", line)
|
|
if match:
|
|
line = match.group(1)
|
|
|
|
match = re.search(".*[0-9a-f]+\:\t[0-9a-f\ ]+\t([a-zA-Z0-9]+) (.*)", line)
|
|
if match:
|
|
ins = match.group(1)
|
|
arg = match.group(2)
|
|
|
|
avx512_score = is_avx512(ins, arg)
|
|
if avx512_score <= 0:
|
|
avx2_score = is_avx2(ins, arg)
|
|
if avx2_score <= 0 and avx512_score <= 0:
|
|
sse_score = is_sse(ins, arg)
|
|
|
|
records.function_record.instructions += 1
|
|
|
|
match = re.search(r'^[0-9a-f]+ <(.+)>:$', line)
|
|
if match:
|
|
records.function_record.name = match.group(1)
|
|
|
|
if sse_score >= 0.0:
|
|
sse_str = str(sse_score)
|
|
records.function_record.scores["sse"] += sse_score
|
|
records.function_record.counts["sse"] += 1
|
|
|
|
if avx2_score >= 0.0:
|
|
avx2_str = str(avx2_score)
|
|
records.function_record.scores["avx2"] += avx2_score
|
|
records.function_record.counts["avx2"] += 1
|
|
|
|
if avx512_score >= 0.0:
|
|
avx512_str = str(avx512_score)
|
|
records.function_record.scores["avx512"] += avx512_score
|
|
records.function_record.counts["avx512"] += 1
|
|
|
|
if sse_score >=0.0 and avx2_score >= 0.0 and debug:
|
|
sse_avx2_duplicate_cnt +=1
|
|
print("duplicate count for sse & avx2 ?", ins, arg, sse_avx2_duplicate_cnt)
|
|
|
|
if avx512_score >= 0.0 and avx2_score >= 0.0 and debug:
|
|
avx2_avx512_duplicate_cnt +=1
|
|
print("duplicate count for avx2 & avx512 ?", ins, arg, avx2_avx512_duplicate_cnt)
|
|
|
|
if not records.should_delete() and quiet != 0:
|
|
sys.exit(0)
|
|
|
|
if verbose > 0:
|
|
print(sse_str,"\t",avx2_str,"\t", avx512_str,"\t", line)
|
|
|
|
|
|
def do_file(filename: str, verbose:int, quiet:int, delete_type:str) -> None:
|
|
global debug
|
|
|
|
records = RecordKeeper(delete_type)
|
|
|
|
if quiet == 0:
|
|
print("Analyzing", filename)
|
|
|
|
p = subprocess.Popen(["objdump","-d", filename], stdout=subprocess.PIPE)
|
|
for line in p.stdout:
|
|
process_objdump_line(records, line.decode("latin-1"), verbose, quiet)
|
|
output, _ = p.communicate()
|
|
for line in output.decode("latin-1").splitlines():
|
|
process_objdump_line(records, line, verbose, quiet)
|
|
if quiet <= 0:
|
|
print_top_functions(records)
|
|
print()
|
|
print("File total (SSE): ", records.total_counts["sse"],"instructions with score", round(records.total_scores["sse"]))
|
|
print("File total (AVX2): ", records.total_counts["avx2"],"instructions with score", round(records.total_scores["avx2"]))
|
|
print("File total (AVX512): ", records.total_counts["avx512"],"instructions with score", round(records.total_scores["avx512"]))
|
|
print()
|
|
if debug:
|
|
print("File duplicate count of sse&avx2", sse_avx2_duplicate_cnt, ", duplicate count of avx2&avx512", avx2_avx512_duplicate_cnt)
|
|
|
|
if records.should_delete():
|
|
print(filename, "\t", delete_type, "count:", records.total_counts[delete_type],"\t", delete_type, "value:", ratio(records.total_scores[delete_type]))
|
|
try:
|
|
os.unlink(filename)
|
|
except:
|
|
None
|
|
|
|
|
|
def main():
|
|
global debug
|
|
|
|
verbose = 0
|
|
quiet = 0
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument("-v", "--verbose", help="increase output verbosity", action="store_true")
|
|
parser.add_argument("-q", "--quiet", help="decrease output verbosity", action="store_true")
|
|
parser.add_argument("-d", "--debug", help="print out more debug info", action="store_true")
|
|
parser.add_argument("filename", help = "The filename to inspect")
|
|
group = parser.add_mutually_exclusive_group()
|
|
group.add_argument("-1", "--unlinksse", help="unlink the file if it has no SSE instructions", action="store_true")
|
|
group.add_argument("-2", "--unlinkavx2", help="unlink the file if it has no AVX2 instructions", action="store_true")
|
|
group.add_argument("-5", "--unlinkavx512", help="unlink the file if it has no AVX512 instructions", action="store_true")
|
|
|
|
args = parser.parse_args()
|
|
if args.verbose:
|
|
verbose = 1
|
|
|
|
if args.quiet:
|
|
verbose = 0
|
|
quiet = 1
|
|
|
|
if args.debug:
|
|
debug = 1
|
|
|
|
if args.unlinksse:
|
|
deltype = "sse"
|
|
elif args.unlinkavx2:
|
|
deltype = "avx2"
|
|
elif args.unlinkavx512:
|
|
deltype = "avx512"
|
|
else:
|
|
deltype = ""
|
|
|
|
do_file(args.filename, verbose, quiet, deltype)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|