Total Number of Bioinformatics repositories in Github:
Figure 2: Number of Bioinformatics Repositories created by Years (source Github). |
Total Number of Bioinformatics Projects by Programming Language:
Figure 3: Number of GitHub repositories created in Years by Language (source GitHub) |
Number of Forks by Language:
Figure 4: Distribution of Number of Forks of Bioinformatics Repositories by Language (source GitHub) |
Some of the most popular repositories by Forks:
Repo
|
Language
|
Forks
|
Watchers
|
Created
|
bigdatagenomics/adam
|
Scala
|
154
|
399
|
2013
|
chapmanb/bcbb
|
Python
|
146
|
298
|
2008
|
igvteam/igv
|
Java
|
81
|
167
|
2012
|
bionode/bionode
|
JavaScript
|
23
|
161
|
2014
|
tumblr/genesis
|
Ruby
|
15
|
133
|
2014
|
griffithlab/rnaseq_tutorial
|
R
|
69
|
116
|
2014
|
Number of Open Issues by Programming Language:
Figure 5: Distribution of Number of Open Issues of Bioinformatics Repositories by Language (source GitHub) |
Number of references of GitHub.com in PubMed Central:
Figure 6: Number of Bioinformatics GitHub mentions in PubMed Central Repositories created by Years (source PubMed Central). |
Methods
All numbers where generated with the current R script (replace token with Github API token):
library(devtools)
library(RCurl)
library(RJSONIO)
library(ggplot2)
library(ttutils)
ghSearchRepo <- function(user = NULL, language = "R", only.fullname = TRUE, queryString = NULL, pageSizeNumber = 100){
if(!is.null(user)){
user <- paste0("+user:", user)
}
if(!is.null(language)){
language <- paste0("+language:", language)
}
if(!is.null(queryString)){
query <- paste0(queryString)
}
pageSize <- paste0("&per_page=", pageSizeNumber)
search <- paste0("https://api.github.com/search/repositories?q=",
query,
user,
language,
pageSize,
"&access_token=token")
response <- getURL(search, ssl.verifypeer = FALSE, useragent = "RCurl")
output <- fromJSON(response)
if(output[["total_count"]] == 0){
return(NULL)
}
items <- output$items;
if(output[["total_count"]] > pageSizeNumber){
numPage = (as.numeric(output[["total_count"]])/as.numeric(pageSizeNumber))+1
for(i in 1:numPage){
page <- paste0("&page=", i)
search <- paste0("https://api.github.com/search/repositories?q=",
query,
user,
language,
pageSize,
page,
"&access_token=token")
response <- getURL(search, ssl.verifypeer = FALSE, useragent = "RCurl")
currentItems <- fromJSON(response)
if(!is.null(currentItems)){
items <- append(items, currentItems$items)
}
}
}
items
}
addToDF <- function(df, repoList){
length(repoList)
for(i in 1:length(repoList)){
print(repoList[[i]]$full_name)
repository <- ifelse(is.null(repoList[[i]]$full_name),"Unknown", repoList[[i]]$full_name)
language <- ifelse(is.null(repoList[[i]]$language),"Unknown", repoList[[i]]$language)
forks_count <- ifelse(is.null(repoList[[i]]$forks_count),0, repoList[[i]]$forks_count)
watchers_count <- ifelse(is.null(repoList[[i]]$watchers_count),0, repoList[[i]]$watchers_count)
created_at <- ifelse(is.null(repoList[[i]]$created_at),"Unknown", substr(repoList[[i]]$created_at,
0,4))
private <- ifelse(is.null(repoList[[i]]$private),"Unknown", repoList[[i]]$private)
df <- rbind(df, data.frame("repository" = repository, "language" = language, "forks_count"
= forks_count, "watchers_count" = watchers_count, "created" = created_at, "private" = private))
}
df
}
languageKeywords <- function(df, keyword){
javaRepos = ghSearchRepo(only.fullname = FALSE, queryString = keyword, language = "Java")
df <- addToDF(df, javaRepos)
rRepos = ghSearchRepo(only.fullname = FALSE, queryString = keyword, language = "R")
df <- addToDF(df, repoList = rRepos)
pRepos = ghSearchRepo(only.fullname = FALSE, queryString = keyword, language = "Python")
df <- addToDF(df, repoList = pRepos)
cplusRepos = ghSearchRepo(only.fullname = FALSE, queryString = keyword, language = "C++")
df <- addToDF(df, repoList = cplusRepos)
sRepos = ghSearchRepo(only.fullname = FALSE, queryString = keyword, language = "Scala")
df <- addToDF(df, repoList = sRepos)
jsRepos = ghSearchRepo(only.fullname = FALSE, queryString = keyword, language = "JavaScript")
df <- addToDF(df, repoList = jsRepos)
prRepos = ghSearchRepo(only.fullname = FALSE, queryString = keyword, language = "Perl")
df <- addToDF(df, repoList = prRepos)
shelRepos = ghSearchRepo(only.fullname = FALSE, queryString = keyword, language = "Shell")
df <- addToDF(df, repoList = shelRepos)
rubyRepos = ghSearchRepo(only.fullname = FALSE, queryString = keyword, language = "Ruby")
df <- addToDF(df, repoList = rubyRepos)
cRepos = ghSearchRepo(only.fullname = FALSE, queryString = keyword, language = "C")
df <- addToDF(df, repoList = cRepos)
df
}
df <- data.frame("repository" = character(), "language" = character(), "forks_count" = numeric(),
"watchers_count" = numeric(), "created" = character(),"private"= character())
df <- languageKeywords(df, "bioinformatics")
df <- languageKeywords(df, "genomics")
df <- languageKeywords(df, "proteomics")
df <- languageKeywords(df, "proteins")
df <- languageKeywords(df, "genes")
df <- languageKeywords(df, "spectrometry")
df <- languageKeywords(df, "biology")
df <- unique(df)
df <- subset(df, repository != "Unknown")
Code for pubMed Central by Stephen Eglen:
library(RCurl)
library(RJSONIO)
library(ggplot2)
library(ttutils)
ghSearchRepo <- function(user = NULL, language = "R", only.fullname = TRUE, queryString = NULL, pageSizeNumber = 100){
if(!is.null(user)){
user <- paste0("+user:", user)
}
if(!is.null(language)){
language <- paste0("+language:", language)
}
if(!is.null(queryString)){
query <- paste0(queryString)
}
pageSize <- paste0("&per_page=", pageSizeNumber)
search <- paste0("https://api.github.com/search/repositories?q=",
query,
user,
language,
pageSize,
"&access_token=token")
response <- getURL(search, ssl.verifypeer = FALSE, useragent = "RCurl")
output <- fromJSON(response)
if(output[["total_count"]] == 0){
return(NULL)
}
items <- output$items;
if(output[["total_count"]] > pageSizeNumber){
numPage = (as.numeric(output[["total_count"]])/as.numeric(pageSizeNumber))+1
for(i in 1:numPage){
page <- paste0("&page=", i)
search <- paste0("https://api.github.com/search/repositories?q=",
query,
user,
language,
pageSize,
page,
"&access_token=token")
response <- getURL(search, ssl.verifypeer = FALSE, useragent = "RCurl")
currentItems <- fromJSON(response)
if(!is.null(currentItems)){
items <- append(items, currentItems$items)
}
}
}
items
}
addToDF <- function(df, repoList){
length(repoList)
for(i in 1:length(repoList)){
print(repoList[[i]]$full_name)
repository <- ifelse(is.null(repoList[[i]]$full_name),"Unknown", repoList[[i]]$full_name)
language <- ifelse(is.null(repoList[[i]]$language),"Unknown", repoList[[i]]$language)
forks_count <- ifelse(is.null(repoList[[i]]$forks_count),0, repoList[[i]]$forks_count)
watchers_count <- ifelse(is.null(repoList[[i]]$watchers_count),0, repoList[[i]]$watchers_count)
created_at <- ifelse(is.null(repoList[[i]]$created_at),"Unknown", substr(repoList[[i]]$created_at,
0,4))
private <- ifelse(is.null(repoList[[i]]$private),"Unknown", repoList[[i]]$private)
df <- rbind(df, data.frame("repository" = repository, "language" = language, "forks_count"
= forks_count, "watchers_count" = watchers_count, "created" = created_at, "private" = private))
}
df
}
languageKeywords <- function(df, keyword){
javaRepos = ghSearchRepo(only.fullname = FALSE, queryString = keyword, language = "Java")
df <- addToDF(df, javaRepos)
rRepos = ghSearchRepo(only.fullname = FALSE, queryString = keyword, language = "R")
df <- addToDF(df, repoList = rRepos)
pRepos = ghSearchRepo(only.fullname = FALSE, queryString = keyword, language = "Python")
df <- addToDF(df, repoList = pRepos)
cplusRepos = ghSearchRepo(only.fullname = FALSE, queryString = keyword, language = "C++")
df <- addToDF(df, repoList = cplusRepos)
sRepos = ghSearchRepo(only.fullname = FALSE, queryString = keyword, language = "Scala")
df <- addToDF(df, repoList = sRepos)
jsRepos = ghSearchRepo(only.fullname = FALSE, queryString = keyword, language = "JavaScript")
df <- addToDF(df, repoList = jsRepos)
prRepos = ghSearchRepo(only.fullname = FALSE, queryString = keyword, language = "Perl")
df <- addToDF(df, repoList = prRepos)
shelRepos = ghSearchRepo(only.fullname = FALSE, queryString = keyword, language = "Shell")
df <- addToDF(df, repoList = shelRepos)
rubyRepos = ghSearchRepo(only.fullname = FALSE, queryString = keyword, language = "Ruby")
df <- addToDF(df, repoList = rubyRepos)
cRepos = ghSearchRepo(only.fullname = FALSE, queryString = keyword, language = "C")
df <- addToDF(df, repoList = cRepos)
df
}
df <- data.frame("repository" = character(), "language" = character(), "forks_count" = numeric(),
"watchers_count" = numeric(), "created" = character(),"private"= character())
df <- languageKeywords(df, "bioinformatics")
df <- languageKeywords(df, "genomics")
df <- languageKeywords(df, "proteomics")
df <- languageKeywords(df, "proteins")
df <- languageKeywords(df, "genes")
df <- languageKeywords(df, "spectrometry")
df <- languageKeywords(df, "biology")
df <- unique(df)
df <- subset(df, repository != "Unknown")
Code for pubMed Central by Stephen Eglen:
require(devtools)
install_github("njahn82/rebi")
require(rebi)
counts = function(year) {
query = sprintf("github.com AND PUB_YEAR:%d", year)
print(query)
my_data <- epmc_search(query = query)
my_data$hit_count
}
years = 2009:2015
hits = sapply(years, counts)
png(file="github-usage.png")
plot(years, hits, log='', pch=19, bty='n',
xlab='year of publication',
ylim=c(0, 2000),
las=1, main='github.com references on Pubmed Central')
dev.off()
No comments:
Post a Comment