#
#
#
#
#two questions:   
# 1. which ZINC IDs have the most catalog ids.  (most redundancy of supply)
# 2. which supplier codes have the most ZINC IDs (most ambiguous, enumerated into multiple forms) 

foreach i (H??)
	foreach j ($i/H??[PM]???.smi.gz)
		zcat $j  | awk '{print $2}' | sort | uniq -c | sort -k 1 -r  | awk '{if ($1 > 1) print}' > $j.a
		zcat $j  | awk '{print $3}' | sort | uniq -c | sort -k 1 -r  | awk '{if ($1 > 1) print}' > $j.b
		echo $j
	end

end