We present an extensive study to build classification and regression models using five different ADMET data sets (HIA, LogP, LogS, BBB, and two toxicological data sets causing cancer in rats and mice). We compare especially the relevance of vector based coding for molecules using descriptors and fingerprints and a coordinate-free coding working directly on the molecular structures avoiding a temporary abstract vector representation. We see that the vector coding can be used for large data sets by loosing accuracy and the coordinate-free approach avoids the feature selection problem, but is only applicable for smaller data sets. Furthermore we discuss shortly the underlying space and time complexities.
@article{2005_6, author = {Wegner, J\"org K. and Zell, Andreas}, title = {Difference in vector based and graph based coding for {ADME} prediction}, journal = {Abstracts of Papers American Chemical Society (229th National Meeting of the American-Chemical-Society)}, year = {2005}, volume = {229}, pages = {U608}, number = {Part 1}, month = mar, abstract = {We present an extensive study to build classification and regression models using five different ADMET data sets (HIA, LogP, LogS, BBB, and two toxicological data sets causing cancer in rats and mice). We compare especially the relevance of vector based coding for molecules using descriptors and fingerprints and a coordinate-free coding working directly on the molecular structures avoiding a temporary abstract vector representation. We see that the vector coding can be used for large data sets by loosing accuracy and the coordinate-free approach avoids the feature selection problem, but is only applicable for smaller data sets. Furthermore we discuss shortly the underlying space and time complexities.}, address = {San Diego, CA, USA}, }