% 29790989
@Article{pmid29790989,
Author="Afgan, E. and Baker, D. and Batut, B. and van den Beek, M. and Bouvier, D. and Cech, M. and Chilton, J. and Clements, D. and Coraor, N. and Gruning, B. A. and Guerler, A. and Hillman-Jackson, J. and Hiltemann, S. and Jalili, V. and Rasche, H. and Soranzo, N. and Goecks, J. and Taylor, J. and Nekrutenko, A. and Blankenberg, D. ",
Title="{{T}he {G}alaxy platform for accessible, reproducible and collaborative biomedical analyses: 2018 update}",
Journal="Nucleic Acids Res.",
Year="2018",
Month="May",
Note={[DOI:\href{https://dx.doi.org/10.1093/nar/gky379}{10.1093/nar/gky379}] [PubMed:\href{https://www.ncbi.nlm.nih.gov/pubmed/29790989}{29790989}] }
}
% 29688462
@Article{pmid29688462,
Author="Nekrutenko, A. and Team, G. and Goecks, J. and Taylor, J. and Blankenberg, D. ",
Title="{{B}iology needs evolutionary software tools: {L}et's build them right}",
Journal="Mol. Biol. Evol.",
Year="2018",
Month="Apr",
Note={[DOI:\href{https://dx.doi.org/10.1093/molbev/msy084}{10.1093/molbev/msy084}] [PubMed:\href{https://www.ncbi.nlm.nih.gov/pubmed/29688462}{29688462}] }
}
% 27137889
@Article{pmid27137889,
Author="Afgan, E. and Baker, D. and van den Beek, M. and Blankenberg, D. and Bouvier, D. and ?ech, M. and Chilton, J. and Clements, D. and Coraor, N. and Eberhard, C. and Gruning, B. and Guerler, A. and Hillman-Jackson, J. and Von Kuster, G. and Rasche, E. and Soranzo, N. and Turaga, N. and Taylor, J. and Nekrutenko, A. and Goecks, J. ",
Title="{{T}he {G}alaxy platform for accessible, reproducible and collaborative biomedical analyses: 2016 update}",
Journal="Nucleic Acids Res.",
Year="2016",
Volume="44",
Number="W1",
Pages="W3-W10",
Month="07",
Note={[PubMed Central:\href{https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4987906}{PMC4987906}] [DOI:\href{https://dx.doi.org/10.1093/nar/gkw343}{10.1093/nar/gkw343}] [PubMed:\href{https://www.ncbi.nlm.nih.gov/pubmed/27137889}{27137889}] }
}
% 26780094
@Article{pmid26780094,
Author="Qu, K. and Garamszegi, S. and Wu, F. and Thorvaldsdottir, H. and Liefeld, T. and Ocana, M. and Borges-Rivera, D. and Pochet, N. and Robinson, J. T. and Demchak, B. and Hull, T. and Ben-Artzi, G. and Blankenberg, D. and Barber, G. P. and Lee, B. T. and Kuhn, R. M. and Nekrutenko, A. and Segal, E. and Ideker, T. and Reich, M. and Regev, A. and Chang, H. Y. and Mesirov, J. P. ",
Title="{{I}ntegrative genomic analysis by interoperation of bioinformatics tools in {G}enome{S}pace}",
Journal="Nat. Methods",
Year="2016",
Volume="13",
Number="3",
Pages="245--247",
Month="Mar",
Abstract={Complex biomedical analyses require the use of multiple software tools in concert and remain challenging for much of the biomedical research community. We introduce GenomeSpace (http://www.genomespace.org), a cloud-based, cooperative community resource that currently supports the streamlined interaction of 20 bioinformatics tools and data resources. To facilitate integrative analysis by non-programmers, it offers a growing set of 'recipes', short workflows to guide investigators through high-utility analysis tasks.},
Note={[PubMed Central:\href{http://www.ncbi.nlm.nih.gov/pmc/articles/PMC4767623}{PMC4767623}] [DOI:\href{http://dx.doi.org/10.1038/nmeth.3732}{10.1038/nmeth.3732}] [PubMed:\href{http://www.ncbi.nlm.nih.gov/pubmed/26780094}{26780094}] }
}
% 25655493
@Article{pmid25655493,
Author="Blankenberg, D. and Taylor, J. and Nekrutenko, A. ",
Title="{{O}nline resources for genomic analysis using high-throughput sequencing}",
Journal="Cold Spring Harb Protoc",
Year="2015",
Volume="2015",
Number="4",
Pages="324--335",
Month="Apr",
Abstract={The availability of high-throughput sequencing has created enormous possibilities for scientific discovery. However, the massive amount of data being generated has resulted in a severe informatics bottleneck. A large number of tools exist for analyzing next-generation sequencing (NGS) data, yet often there remains a disconnect between these research tools and the ability of many researchers to use them. As a consequence, several online resources and communities have been developed to assist researchers with both the management and the analysis of sequencing data sets. Here we describe the use and applications of common file formats for coding and storing genomic data, consider several web-accessible open-source resources for the visualization and analysis of NGS data, and provide examples of typical analyses with links to further detailed exercises.},
Note={[DOI:\href{http://dx.doi.org/10.1101/pdb.top083667}{10.1101/pdb.top083667}] [PubMed:\href{http://www.ncbi.nlm.nih.gov/pubmed/25655493}{25655493}] }
}
% 25313049
@Article{pmid25313049,
Author="Rebolledo-Jaramillo, B. and Su, M. S. and Stoler, N. and McElhoe, J. A. and Dickins, B. and Blankenberg, D. and Korneliussen, T. S. and Chiaromonte, F. and Nielsen, R. and Holland, M. M. and Paul, I. M. and Nekrutenko, A. and Makova, K. D. ",
Title="{{M}aternal age effect and severe germ-line bottleneck in the inheritance of human mitochondrial {D}{N}{A}}",
Journal="Proc. Natl. Acad. Sci. U.S.A.",
Year="2014",
Volume="111",
Number="43",
Pages="15474--15479",
Month="Oct",
Abstract={The manifestation of mitochondrial DNA (mtDNA) diseases depends on the frequency of heteroplasmy (the presence of several alleles in an individual), yet its transmission across generations cannot be readily predicted owing to a lack of data on the size of the mtDNA bottleneck during oogenesis. For deleterious heteroplasmies, a severe bottleneck may abruptly transform a benign (low) frequency in a mother into a disease-causing (high) frequency in her child. Here we present a high-resolution study of heteroplasmy transmission conducted on blood and buccal mtDNA of 39 healthy mother-child pairs of European ancestry (a total of 156 samples, each sequenced at ∼20,000× per site). On average, each individual carried one heteroplasmy, and one in eight individuals carried a disease-associated heteroplasmy, with minor allele frequency ≥1%. We observed frequent drastic heteroplasmy frequency shifts between generations and estimated the effective size of the germ-line mtDNA bottleneck at only ∼30-35 (interquartile range from 9 to 141). Accounting for heteroplasmies, we estimated the mtDNA germ-line mutation rate at 1.3 × 10(-8) (interquartile range from 4.2 × 10(-9) to 4.1 × 10(-8)) mutations per site per year, an order of magnitude higher than for nuclear DNA. Notably, we found a positive association between the number of heteroplasmies in a child and maternal age at fertilization, likely attributable to oocyte aging. This study also took advantage of droplet digital PCR (ddPCR) to validate heteroplasmies and confirm a de novo mutation. Our results can be used to predict the transmission of disease-causing mtDNA variants and illuminate evolutionary dynamics of the mitochondrial genome.},
Note={[PubMed Central:\href{http://www.ncbi.nlm.nih.gov/pmc/articles/PMC4217420}{PMC4217420}] [DOI:\href{http://dx.doi.org/10.1073/pnas.1409328111}{10.1073/pnas.1409328111}] [PubMed:\href{http://www.ncbi.nlm.nih.gov/pubmed/25313049}{25313049}] }
}
% 25001293
@Article{pmid25001293,
Author="Blankenberg, D. and Von Kuster, G. and Bouvier, E. and Baker, D. and Afgan, E. and Stoler, N. and Taylor, J. and Nekrutenko, A. ",
Title="{{D}issemination of scientific software with {G}alaxy {T}ool{S}hed}",
Journal="Genome Biol.",
Year="2014",
Volume="15",
Number="2",
Pages="403",
Abstract={The proliferation of web-based integrative analysis frameworks has enabled users to perform complex analyses directly through the web. Unfortunately, it also revoked the freedom to easily select the most appropriate tools. To address this, we have developed Galaxy ToolShed.},
Note={[PubMed Central:\href{http://www.ncbi.nlm.nih.gov/pmc/articles/PMC4038738}{PMC4038738}] [DOI:\href{http://dx.doi.org/10.1186/gb4161}{10.1186/gb4161}] [PubMed:\href{http://www.ncbi.nlm.nih.gov/pubmed/25001293}{25001293}] }
}
% 24743989
@Article{pmid24743989,
Author="Blankenberg, D. and Hillman-Jackson, J. ",
Title="{{A}nalysis of next-generation sequencing data using {G}alaxy}",
Journal="Methods Mol. Biol.",
Year="2014",
Volume="1150",
Pages="21--43",
Abstract={The extraordinary throughput of next-generation sequencing (NGS) technology is outpacing our ability to analyze and interpret the data. This chapter will focus on practical informatics methods, strategies, and software tools for transforming NGS data into usable information through the use of a web-based platform, Galaxy. The Galaxy interface is explored through several different types of example analyses. Instructions for running one's own Galaxy server on local hardware or on cloud computing resources are provided. Installing new tools into a personal Galaxy instance is also demonstrated.},
Note={[DOI:\href{http://dx.doi.org/10.1007/978-1-4939-0512-6_2}{10.1007/978-1-4939-0512-6_2}] [PubMed:\href{http://www.ncbi.nlm.nih.gov/pubmed/24743989}{24743989}] }
}
% 24641477
@Article{pmid24641477,
Author="Dickins, B. and Rebolledo-Jaramillo, B. and Su, M. S. and Paul, I. M. and Blankenberg, D. and Stoler, N. and Makova, K. D. and Nekrutenko, A. ",
Title="{{C}ontrolling for contamination in re-sequencing studies with a reproducible web-based phylogenetic approach}",
Journal="BioTechniques",
Year="2014",
Volume="56",
Number="3",
Pages="134--136",
Abstract={Polymorphism discovery is a routine application of next-generation sequencing technology where multiple samples are sent to a service provider for library preparation, subsequent sequencing, and bioinformatic analyses. The decreasing cost and advances in multiplexing approaches have made it possible to analyze hundreds of samples at a reasonable cost. However, because of the manual steps involved in the initial processing of samples and handling of sequencing equipment, cross-contamination remains a significant challenge. It is especially problematic in cases where polymorphism frequencies do not adhere to diploid expectation, for example, heterogeneous tumor samples, organellar genomes, as well as during bacterial and viral sequencing. In these instances, low levels of contamination may be readily mistaken for polymorphisms, leading to false results. Here we describe practical steps designed to reliably detect contamination and uncover its origin, and also provide new, Galaxy-based, readily accessible computational tools and workflows for quality control. All results described in this report can be reproduced interactively on the web as described at http://usegalaxy.org/contamination.},
Note={[PubMed Central:\href{http://www.ncbi.nlm.nih.gov/pmc/articles/PMC4377138}{PMC4377138}] [DOI:\href{http://dx.doi.org/10.2144/000114146}{10.2144/000114146}] [PubMed:\href{http://www.ncbi.nlm.nih.gov/pubmed/24641477}{24641477}] }
}
% 24585771
@Article{pmid24585771,
Author="Blankenberg, D. and Johnson, J. E. and Taylor, J. and Nekrutenko, A. ",
Title="{{W}rangling {G}alaxy's reference data}",
Journal="Bioinformatics",
Year="2014",
Volume="30",
Number="13",
Pages="1917--1919",
Month="Jul",
Abstract={The Galaxy platform has developed into a fully featured collaborative workbench, with goals of inherently capturing provenance to enable reproducible data analysis, and of making it straightforward to run one's own server. However, many Galaxy platform tools rely on the presence of reference data, such as alignment indexes, to function efficiently. Until now, the building of this cache of data for Galaxy has been an error-prone manual process lacking reproducibility and provenance. The Galaxy Data Manager framework is an enhancement that changes the management of Galaxy's built-in data cache from a manual procedure to an automated graphical user interface (GUI) driven process, which contains the same openness, reproducibility and provenance that is afforded to Galaxy's analysis tools. Data Manager tools allow the Galaxy administrator to download, create and install additional datasets for any type of reference data in real time.\\ The Galaxy Data Manager framework is implemented in Python and has been integrated as part of the core Galaxy platform. Individual Data Manager tools can be defined locally or installed from a ToolShed, allowing the Galaxy community to define additional Data Manager tools as needed, with full versioning and dependency support.},
Note={[PubMed Central:\href{http://www.ncbi.nlm.nih.gov/pmc/articles/PMC4071198}{PMC4071198}] [DOI:\href{http://dx.doi.org/10.1093/bioinformatics/btu119}{10.1093/bioinformatics/btu119}] [PubMed:\href{http://www.ncbi.nlm.nih.gov/pubmed/24585771}{24585771}] }
}
% 23051646
@Article{pmid23051646,
Author="Minevich, G. and Park, D. S. and Blankenberg, D. and Poole, R. J. and Hobert, O. ",
Title="{{C}loud{M}ap: a cloud-based pipeline for analysis of mutant genome sequences}",
Journal="Genetics",
Year="2012",
Volume="192",
Number="4",
Pages="1249--1269",
Month="Dec",
Abstract={Whole genome sequencing (WGS) allows researchers to pinpoint genetic differences between individuals and significantly shortcuts the costly and time-consuming part of forward genetic analysis in model organism systems. Currently, the most effort-intensive part of WGS is the bioinformatic analysis of the relatively short reads generated by second generation sequencing platforms. We describe here a novel, easily accessible and cloud-based pipeline, called CloudMap, which greatly simplifies the analysis of mutant genome sequences. Available on the Galaxy web platform, CloudMap requires no software installation when run on the cloud, but it can also be run locally or via Amazon's Elastic Compute Cloud (EC2) service. CloudMap uses a series of predefined workflows to pinpoint sequence variations in animal genomes, such as those of premutagenized and mutagenized Caenorhabditis elegans strains. In combination with a variant-based mapping procedure, CloudMap allows users to sharply define genetic map intervals graphically and to retrieve very short lists of candidate variants with a few simple clicks. Automated workflows and extensive video user guides are available to detail the individual analysis steps performed (http://usegalaxy.org/cloudmap). We demonstrate the utility of CloudMap for WGS analysis of C. elegans and Arabidopsis genomes and describe how other organisms (e.g., Zebrafish and Drosophila) can easily be accommodated by this software platform. To accommodate rapid analysis of many mutants from large-scale genetic screens, CloudMap contains an in silico complementation testing tool that allows users to rapidly identify instances where multiple alleles of the same gene are present in the mutant collection. Lastly, we describe the application of a novel mapping/WGS method ("Variant Discovery Mapping") that does not rely on a defined polymorphic mapping strain, and we integrate the application of this method into CloudMap. CloudMap tools and documentation are continually updated at http://usegalaxy.org/cloudmap.},
Note={[PubMed Central:\href{http://www.ncbi.nlm.nih.gov/pmc/articles/PMC3512137}{PMC3512137}] [DOI:\href{http://dx.doi.org/10.1534/genetics.112.144204}{10.1534/genetics.112.144204}] [PubMed:\href{http://www.ncbi.nlm.nih.gov/pubmed/23051646}{23051646}] }
}
% 22889292
@Article{pmid22889292,
Author="Stamatoyannopoulos, J. A. and Snyder, M. and Hardison, R. and Ren, B. and Gingeras, T. and Gilbert, D. M. and Groudine, M. and Bender, M. and Kaul, R. and Canfield, T. and Giste, E. and Johnson, A. and Zhang, M. and Balasundaram, G. and Byron, R. and Roach, V. and Sabo, P. J. and Sandstrom, R. and Stehling, A. S. and Thurman, R. E. and Weissman, S. M. and Cayting, P. and Hariharan, M. and Lian, J. and Cheng, Y. and Landt, S. G. and Ma, Z. and Wold, B. J. and Dekker, J. and Crawford, G. E. and Keller, C. A. and Wu, W. and Morrissey, C. and Kumar, S. A. and Mishra, T. and Jain, D. and Byrska-Bishop, M. and Blankenberg, D. and Lajoie, B. R. and Jain, G. and Sanyal, A. and Chen, K. B. and Denas, O. and Taylor, J. and Blobel, G. A. and Weiss, M. J. and Pimkin, M. and Deng, W. and Marinov, G. K. and Williams, B. A. and Fisher-Aylor, K. I. and Desalvo, G. and Kiralusha, A. and Trout, D. and Amrhein, H. and Mortazavi, A. and Edsall, L. and McCleary, D. and Kuan, S. and Shen, Y. and Yue, F. and Ye, Z. and Davis, C. A. and Zaleski, C. and Jha, S. and Xue, C. and Dobin, A. and Lin, W. and Fastuca, M. and Wang, H. and Guigo, R. and Djebali, S. and Lagarde, J. and Ryba, T. and Sasaki, T. and Malladi, V. S. and Cline, M. S. and Kirkup, V. M. and Learned, K. and Rosenbloom, K. R. and Kent, W. J. and Feingold, E. A. and Good, P. J. and Pazin, M. and Lowdon, R. F. and Adams, L. B. ",
Title="{{A}n encyclopedia of mouse {D}{N}{A} elements ({M}ouse {E}{N}{C}{O}{D}{E})}",
Journal="Genome Biol.",
Year="2012",
Volume="13",
Number="8",
Pages="418",
Abstract={To complement the human Encyclopedia of DNA Elements (ENCODE) project and to enable a broad range of mouse genomics efforts, the Mouse ENCODE Consortium is applying the same experimental pipelines developed for human ENCODE to annotate the mouse genome.},
Note={[PubMed Central:\href{http://www.ncbi.nlm.nih.gov/pmc/articles/PMC3491367}{PMC3491367}] [DOI:\href{http://dx.doi.org/10.1186/gb-2012-13-8-418}{10.1186/gb-2012-13-8-418}] [PubMed:\href{http://www.ncbi.nlm.nih.gov/pubmed/22889292}{22889292}] }
}
% 22700312
@Article{pmid22700312,
Author="Hillman-Jackson, J. and Clements, D. and Blankenberg, D. and Taylor, J. and Nekrutenko, A. ",
Title="{{U}sing {G}alaxy to perform large-scale interactive data analyses}",
Journal="Curr Protoc Bioinformatics",
Year="2012",
Volume="Chapter 10",
Pages="Unit10.5",
Month="Jun",
Abstract={Innovations in biomedical research technologies continue to provide experimental biologists with novel and increasingly large genomic and high-throughput data resources to be analyzed. As creating and obtaining data has become easier, the key decision faced by many researchers is a practical one: where and how should an analysis be performed? Datasets are large and analysis tool set-up and use is riddled with complexities outside of the scope of core research activities. The authors believe that Galaxy provides a powerful solution that simplifies data acquisition and analysis in an intuitive Web application, granting all researchers access to key informatics tools previously only available to computational specialists working in Unix-based environments. We will demonstrate through a series of biomedically relevant protocols how Galaxy specifically brings together (1) data retrieval from public and private sources, for example, UCSC's Eukaryote and Microbial Genome Browsers, (2) custom tools (wrapped Unix functions, format standardization/conversions, interval operations), and 3rd-party analysis tools.},
Note={[PubMed Central:\href{http://www.ncbi.nlm.nih.gov/pmc/articles/PMC4282168}{PMC4282168}] [DOI:\href{http://dx.doi.org/10.1002/0471250953.bi1005s38}{10.1002/0471250953.bi1005s38}] [PubMed:\href{http://www.ncbi.nlm.nih.gov/pubmed/22700312}{22700312}] }
}
% 21775304
@Article{pmid21775304,
Author="Blankenberg, D. and Taylor, J. and Nekrutenko, A. and Afgan, E. and Baker, D. and Blankenberg, D. and Coraor, N. and Goecks, J. and Von Kuster, G. and Lazarus, R. and Li, K. and Vincent, K. ",
Title="{{M}aking whole genome multiple alignments usable for biologists}",
Journal="Bioinformatics",
Year="2011",
Volume="27",
Number="17",
Pages="2426--2428",
Month="Sep",
Abstract={Here we describe a set of tools implemented within the Galaxy platform designed to make analysis of multiple genome alignments truly accessible for biologists. These tools are available through both a web-based graphical user interface and a command-line interface.\\ This open-source toolset was implemented in Python and has been integrated into the online data analysis platform Galaxy (public web access: http://usegalaxy.org; download: http://getgalaxy.org). Additional help is available as a live supplement from http://usegalaxy.org/u/dan/p/maf.\\ james.taylor@emory.edu; anton@bx.psu.edu\\ Supplementary data are available at Bioinformatics online.},
Note={[PubMed Central:\href{http://www.ncbi.nlm.nih.gov/pmc/articles/PMC3157923}{PMC3157923}] [DOI:\href{http://dx.doi.org/10.1093/bioinformatics/btr398}{10.1093/bioinformatics/btr398}] [PubMed:\href{http://www.ncbi.nlm.nih.gov/pubmed/21775304}{21775304}] }
}
% 21531983
@Article{pmid21531983,
Author="Blankenberg, D. and Coraor, N. and Von Kuster, G. and Taylor, J. and Nekrutenko, A. ",
Title="{{I}ntegrating diverse databases into an unified analysis framework: a {G}alaxy approach}",
Journal="Database (Oxford)",
Year="2011",
Volume="2011",
Pages="bar011",
Abstract={Recent technological advances have lead to the ability to generate large amounts of data for model and non-model organisms. Whereas, in the past, there have been a relatively small number of central repositories that serve genomic data, an increasing number of distinct specialized data repositories and resources have been established. Here, we describe a generic approach that provides for the integration of a diverse spectrum of data resources into a unified analysis framework, Galaxy (http://usegalaxy.org). This approach allows the simplified coupling of external data resources with the data analysis tools available to Galaxy users, while leveraging the native data mining facilities of the external data resources. DATABASE URL: http://usegalaxy.org.},
Note={[PubMed Central:\href{http://www.ncbi.nlm.nih.gov/pmc/articles/PMC3092608}{PMC3092608}] [DOI:\href{http://dx.doi.org/10.1093/database/bar011}{10.1093/database/bar011}] [PubMed:\href{http://www.ncbi.nlm.nih.gov/pubmed/21531983}{21531983}] }
}
% 20562416
@Article{pmid20562416,
Author="Blankenberg, D. and Gordon, A. and Von Kuster, G. and Coraor, N. and Taylor, J. and Nekrutenko, A. ",
Title="{{M}anipulation of {F}{A}{S}{T}{Q} data with {G}alaxy}",
Journal="Bioinformatics",
Year="2010",
Volume="26",
Number="14",
Pages="1783--1785",
Month="Jul",
Abstract={Here, we describe a tool suite that functions on all of the commonly known FASTQ format variants and provides a pipeline for manipulating next generation sequencing data taken from a sequencing machine all the way through the quality filtering steps.\\ This open-source toolset was implemented in Python and has been integrated into the online data analysis platform Galaxy (public web access: http://usegalaxy.org; download: http://getgalaxy.org). Two short movies that highlight the functionality of tools described in this manuscript as well as results from testing components of this tool suite against a set of previously published files are available at http://usegalaxy.org/u/dan/p/fastq},
Note={[PubMed Central:\href{http://www.ncbi.nlm.nih.gov/pmc/articles/PMC2894519}{PMC2894519}] [DOI:\href{http://dx.doi.org/10.1093/bioinformatics/btq281}{10.1093/bioinformatics/btq281}] [PubMed:\href{http://www.ncbi.nlm.nih.gov/pubmed/20562416}{20562416}] }
}
% 20069535
@Article{pmid20069535,
Author="Blankenberg, D. and Von Kuster, G. and Coraor, N. and Ananda, G. and Lazarus, R. and Mangan, M. and Nekrutenko, A. and Taylor, J. ",
Title="{{G}alaxy: a web-based genome analysis tool for experimentalists}",
Journal="Curr Protoc Mol Biol",
Year="2010",
Volume="Chapter 19",
Pages="1--21",
Month="Jan",
Abstract={High-throughput data production has revolutionized molecular biology. However, massive increases in data generation capacity require analysis approaches that are more sophisticated, and often very computationally intensive. Thus, making sense of high-throughput data requires informatics support. Galaxy (http://galaxyproject.org) is a software system that provides this support through a framework that gives experimentalists simple interfaces to powerful tools, while automatically managing the computational details. Galaxy is distributed both as a publicly available Web service, which provides tools for the analysis of genomic, comparative genomic, and functional genomic data, or a downloadable package that can be deployed in individual laboratories. Either way, it allows experimentalists without informatics or programming expertise to perform complex large-scale analysis with just a Web browser.},
Note={[PubMed Central:\href{http://www.ncbi.nlm.nih.gov/pmc/articles/PMC4264107}{PMC4264107}] [DOI:\href{http://dx.doi.org/10.1002/0471142727.mb1910s89}{10.1002/0471142727.mb1910s89}] [PubMed:\href{http://www.ncbi.nlm.nih.gov/pubmed/20069535}{20069535}] }
}
% 18428782
@Article{pmid18428782,
Author="Taylor, J. and Schenck, I. and Blankenberg, D. and Nekrutenko, A. ",
Title="{{U}sing galaxy to perform large-scale interactive data analyses}",
Journal="Curr Protoc Bioinformatics",
Year="2007",
Volume="Chapter 10",
Pages="Unit 10.5",
Month="Sep",
Abstract={While most experimental biologists know where to download genomic data, few have a concrete plan on how to analyze it. This situation can be corrected by: (1) providing unified portals serving genomic data and (2) building Web applications to allow flexible retrieval and on-the-fly analyses of the data. Powerful resources, such as the UCSC Genome Browser already address the first issue. The second issue, however, remains open. For example, how to find human protein-coding exons with the highest density of single nucleotide polymorphisms (SNPs) and extract orthologous sequences from all sequenced mammals? Indeed, one can access all relevant data from the UCSC Genome Browser. But once the data is downloaded how would one deal with millions of SNPs and gigabytes of alignments? Galaxy (http://g2.bx.psu.edu) is designed specifically for that purpose. It amplifies the strengths of existing resources (such as UCSC Genome Browser) by allowing the user to access and, most importantly, analyze data within a single interface in an unprecedented number of ways.},
Note={[PubMed Central:\href{http://www.ncbi.nlm.nih.gov/pmc/articles/PMC3418382}{PMC3418382}] [DOI:\href{http://dx.doi.org/10.1002/0471250953.bi1005s19}{10.1002/0471250953.bi1005s19}] [PubMed:\href{http://www.ncbi.nlm.nih.gov/pubmed/18428782}{18428782}] }
}
% 17984227
@Article{pmid17984227,
Author="Miller, W. and Rosenbloom, K. and Hardison, R. C. and Hou, M. and Taylor, J. and Raney, B. and Burhans, R. and King, D. C. and Baertsch, R. and Blankenberg, D. and Kosakovsky Pond, S. L. and Nekrutenko, A. and Giardine, B. and Harris, R. S. and Tyekucheva, S. and Diekhans, M. and Pringle, T. H. and Murphy, W. J. and Lesk, A. and Weinstock, G. M. and Lindblad-Toh, K. and Gibbs, R. A. and Lander, E. S. and Siepel, A. and Haussler, D. and Kent, W. J. ",
Title="{28-way vertebrate alignment and conservation track in the {U}{C}{S}{C} {G}enome {B}rowser}",
Journal="Genome Res.",
Year="2007",
Volume="17",
Number="12",
Pages="1797--1808",
Month="Dec",
Abstract={This article describes a set of alignments of 28 vertebrate genome sequences that is provided by the UCSC Genome Browser. The alignments can be viewed on the Human Genome Browser (March 2006 assembly) at http://genome.ucsc.edu, downloaded in bulk by anonymous FTP from http://hgdownload.cse.ucsc.edu/goldenPath/hg18/multiz28way, or analyzed with the Galaxy server at http://g2.bx.psu.edu. This article illustrates the power of this resource for exploring vertebrate and mammalian evolution, using three examples. First, we present several vignettes involving insertions and deletions within protein-coding regions, including a look at some human-specific indels. Then we study the extent to which start codons and stop codons in the human sequence are conserved in other species, showing that start codons are in general more poorly conserved than stop codons. Finally, an investigation of the phylogenetic depth of conservation for several classes of functional elements in the human genome reveals striking differences in the rates and modes of decay in alignability. Each functional class has a distinctive period of stringent constraint, followed by decays that allow (for the case of regulatory regions) or reject (for coding regions and ultraconserved elements) insertions and deletions.},
Note={[PubMed Central:\href{http://www.ncbi.nlm.nih.gov/pmc/articles/PMC2099589}{PMC2099589}] [DOI:\href{http://dx.doi.org/10.1101/gr.6761107}{10.1101/gr.6761107}] [PubMed:\href{http://www.ncbi.nlm.nih.gov/pubmed/17984227}{17984227}] }
}
% 17686158
@Article{pmid17686158,
Author="Sangar, V. and Blankenberg, D. J. and Altman, N. and Lesk, A. M. ",
Title="{{Q}uantitative sequence-function relationships in proteins based on gene ontology}",
Journal="BMC Bioinformatics",
Year="2007",
Volume="8",
Pages="294",
Abstract={The relationship between divergence of amino-acid sequence and divergence of function among homologous proteins is complex. The assumption that homologs share function--the basis of transfer of annotations in databases--must therefore be regarded with caution. Here, we present a quantitative study of sequence and function divergence, based on the Gene Ontology classification of function. We determined the relationship between sequence divergence and function divergence in 6828 protein families from the PFAM database. Within families there is a broad range of sequence similarity from very closely related proteins--for instance, orthologs in different mammals--to very distantly-related proteins at the limit of reliable recognition of homology.\\ We correlated the divergence in sequences determined from pairwise alignments, and the divergence in function determined by path lengths in the Gene Ontology graph, taking into account the fact that many proteins have multiple functions. Our results show that, among homologous proteins, the proportion of divergent functions decreases dramatically above a threshold of sequence similarity at about 50% residue identity. For proteins with more than 50% residue identity, transfer of annotation between homologs will lead to an erroneous attribution with a totally dissimilar function in fewer than 6% of cases. This means that for very similar proteins (about 50 % identical residues) the chance of completely incorrect annotation is low; however, because of the phenomenon of recruitment, it is still non-zero.\\ Our results describe general features of the evolution of protein function, and serve as a guide to the reliability of annotation transfer, based on the closeness of the relationship between a new protein and its nearest annotated relative.},
Note={[PubMed Central:\href{http://www.ncbi.nlm.nih.gov/pmc/articles/PMC1976327}{PMC1976327}] [DOI:\href{http://dx.doi.org/10.1186/1471-2105-8-294}{10.1186/1471-2105-8-294}] [PubMed:\href{http://www.ncbi.nlm.nih.gov/pubmed/17686158}{17686158}] }
}
% 17568012
@Article{pmid17568012,
Author="Blankenberg, D. and Taylor, J. and Schenck, I. and He, J. and Zhang, Y. and Ghent, M. and Veeraraghavan, N. and Albert, I. and Miller, W. and Makova, K. D. and Hardison, R. C. and Nekrutenko, A. ",
Title="{{A} framework for collaborative analysis of {E}{N}{C}{O}{D}{E} data: making large-scale analyses biologist-friendly}",
Journal="Genome Res.",
Year="2007",
Volume="17",
Number="6",
Pages="960--964",
Month="Jun",
Abstract={The standardization and sharing of data and tools are the biggest challenges of large collaborative projects such as the Encyclopedia of DNA Elements (ENCODE). Here we describe a compact Web application, Galaxy2(ENCODE), that effectively addresses these issues. It provides an intuitive interface for the deposition and access of data, and features a vast number of analysis tools including operations on genomic intervals, utilities for manipulation of multiple sequence alignments, and molecular evolution algorithms. By providing a direct link between data and analysis tools, Galaxy2(ENCODE) allows addressing biological questions that are beyond the reach of existing software. We use Galaxy2(ENCODE) to show that the ENCODE regions contain >2000 unannotated transcripts under strong purifying selection that are likely functional. We also show that the ENCODE regions are representative of the entire genome by estimating the rate of nucleotide substitution and comparing it to published data. Although each of these analyses is complex, none takes more than 15 min from beginning to end. Finally, we demonstrate how new tools can be added to Galaxy2(ENCODE) with almost no effort. Every section of the manuscript is supplemented with QuickTime screencasts. Galaxy2(ENCODE) and the screencasts can be accessed at http://g2.bx.psu.edu.},
Note={[PubMed Central:\href{http://www.ncbi.nlm.nih.gov/pmc/articles/PMC1891355}{PMC1891355}] [DOI:\href{http://dx.doi.org/10.1101/gr.5578007}{10.1101/gr.5578007}] [PubMed:\href{http://www.ncbi.nlm.nih.gov/pubmed/17568012}{17568012}] }
}
% 16169926
@Article{pmid16169926,
Author="Giardine, B. and Riemer, C. and Hardison, R. C. and Burhans, R. and Elnitski, L. and Shah, P. and Zhang, Y. and Blankenberg, D. and Albert, I. and Taylor, J. and Miller, W. and Kent, W. J. and Nekrutenko, A. ",
Title="{{G}alaxy: a platform for interactive large-scale genome analysis}",
Journal="Genome Res.",
Year="2005",
Volume="15",
Number="10",
Pages="1451--1455",
Month="Oct",
Abstract={Accessing and analyzing the exponentially expanding genomic sequence and functional data pose a challenge for biomedical researchers. Here we describe an interactive system, Galaxy, that combines the power of existing genome annotation databases with a simple Web portal to enable users to search remote resources, combine data from independent queries, and visualize the results. The heart of Galaxy is a flexible history system that stores the queries from each user; performs operations such as intersections, unions, and subtractions; and links to other computational tools. Galaxy can be accessed at http://g2.bx.psu.edu.},
Note={[PubMed Central:\href{http://www.ncbi.nlm.nih.gov/pmc/articles/PMC1240089}{PMC1240089}] [DOI:\href{http://dx.doi.org/10.1101/gr.4086505}{10.1101/gr.4086505}] [PubMed:\href{http://www.ncbi.nlm.nih.gov/pubmed/16169926}{16169926}] }
}