# # # # #two questions: # 1. which ZINC IDs have the most catalog ids. (most redundancy of supply) # 2. which supplier codes have the most ZINC IDs (most ambiguous, enumerated into multiple forms) foreach i (H??) foreach j ($i/H??[PM]???.smi.gz) zcat $j | awk '{print $2}' | sort | uniq -c | sort -k 1 -r | awk '{if ($1 > 1) print}' > $j.a zcat $j | awk '{print $3}' | sort | uniq -c | sort -k 1 -r | awk '{if ($1 > 1) print}' > $j.b echo $j end end