Reducing paper results by joining keywords

Alfonso R. Reyes

2018-02-18

This is a demonstration on how the number of papers can be reduced using additional keywords to control the number of results returned.

library(petro.One)

test #1

# provide two different set of keywords to combine as vectors
major  <- c("water injection", "water flooding")
minor  <- c("machine-learning", "intelligent")
lesser <- c("neural network", "SVM", "genetic", "algorithm")

p.df <- join_keywords(major, minor, lesser, get_papers = TRUE)
#>   1    45 'water+injection'AND'machine-learning'AND'neural+network'    
#>   2    20 'water+flooding'AND'machine-learning'AND'neural+network'     
#>   3   106 'water+injection'AND'intelligent'AND'neural+network'         
#>   4    40 'water+flooding'AND'intelligent'AND'neural+network'          
#>   5     6 'water+injection'AND'machine-learning'AND'SVM'               
#>   6     6 'water+flooding'AND'machine-learning'AND'SVM'                
#>   7     3 'water+injection'AND'intelligent'AND'SVM'                    
#>   8     5 'water+flooding'AND'intelligent'AND'SVM'                     
#>   9    66 'water+injection'AND'machine-learning'AND'genetic'           
#>  10    30 'water+flooding'AND'machine-learning'AND'genetic'            
#>  11   162 'water+injection'AND'intelligent'AND'genetic'                
#>  12    84 'water+flooding'AND'intelligent'AND'genetic'                 
#>  13   109 'water+injection'AND'machine-learning'AND'algorithm'         
#>  14    53 'water+flooding'AND'machine-learning'AND'algorithm'          
#>  15   437 'water+injection'AND'intelligent'AND'algorithm'              
#>  16   216 'water+flooding'AND'intelligent'AND'algorithm'
p.df
#> $keywords
#> # A tibble: 16 x 6
#>    Var1            Var2             Var3           paper_count sf    url  
#>    <chr>           <chr>            <chr>                <dbl> <chr> <chr>
#>  1 water injection machine-learning neural network       45.0  'wat~ "htt~
#>  2 water flooding  machine-learning neural network       20.0  'wat~ "htt~
#>  3 water injection intelligent      neural network      106    'wat~ "htt~
#>  4 water flooding  intelligent      neural network       40.0  'wat~ "htt~
#>  5 water injection machine-learning SVM                   6.00 'wat~ "htt~
#>  6 water flooding  machine-learning SVM                   6.00 'wat~ "htt~
#>  7 water injection intelligent      SVM                   3.00 'wat~ "htt~
#>  8 water flooding  intelligent      SVM                   5.00 'wat~ "htt~
#>  9 water injection machine-learning genetic              66.0  'wat~ "htt~
#> 10 water flooding  machine-learning genetic              30.0  'wat~ "htt~
#> 11 water injection intelligent      genetic             162    'wat~ "htt~
#> 12 water flooding  intelligent      genetic              84.0  'wat~ "htt~
#> 13 water injection machine-learning algorithm           109    'wat~ "htt~
#> 14 water flooding  machine-learning algorithm            53.0  'wat~ "htt~
#> 15 water injection intelligent      algorithm           437    'wat~ "htt~
#> 16 water flooding  intelligent      algorithm           216    'wat~ "htt~
#> 
#> $papers
#> # A tibble: 1,388 x 7
#>    title_data      paper_id  source  type    year author1_data    keyword 
#>    <chr>           <chr>     <chr>   <chr>  <int> <chr>           <chr>   
#>  1 Recovery Incre~ "       ~ "     ~ "    ~  2001 Brouwer, D.R.,~ 'water+~
#>  2 Ensemble-Based~ "       ~ "     ~ "    ~  2011 Pajonk, Oliver~ 'water+~
#>  3 Dynamic Optimi~ "       ~ "     ~ "    ~  2002 Brouwer, D.R.,~ 'water+~
#>  4 Developing a S~ "       ~ "     ~ "    ~  2017 Alenezi, Faisa~ 'water+~
#>  5 Proactive Opti~ "       ~ "     ~ "    ~  2016 Haghighat Sefa~ 'water+~
#>  6 Production Opt~ "       ~ "     ~ "    ~  2007 Emerick, Alexa~ 'water+~
#>  7 Effective well~ "       ~ "     ~ "    ~  2013 Jamal, Mariam ~ 'water+~
#>  8 Efficient Well~ "       ~ "     ~ "    ~  2008 Sarma, Pallav,~ 'water+~
#>  9 Application Of~ "       ~ "     ~ "    ~  2002 Zheng, Jian, S~ 'water+~
#> 10 An Adaptive Hi~ "       ~ "     ~ "    ~  2013 Oliveira, D.F.~ 'water+~
#> # ... with 1,378 more rows

test #2

# provide two different set of keywords to combine as vectors
m  <- c("water injection", "water flooding")
n  <- c("machine-learning", "machine learning", "intelligent")
p  <- c("neural network", "SVM", "genetic")
q  <- c("algorithm")

p.df <- join_keywords(m, n, p, q, get_papers = TRUE)
#>   1    37 'water+injection'AND'machine-learning'AND'neural+network'AND'algorithm' 
#>   2    16 'water+flooding'AND'machine-learning'AND'neural+network'AND'algorithm' 
#>   3    37 'water+injection'AND'machine+learning'AND'neural+network'AND'algorithm' 
#>   4    16 'water+flooding'AND'machine+learning'AND'neural+network'AND'algorithm' 
#>   5    70 'water+injection'AND'intelligent'AND'neural+network'AND'algorithm' 
#>   6    30 'water+flooding'AND'intelligent'AND'neural+network'AND'algorithm' 
#>   7     5 'water+injection'AND'machine-learning'AND'SVM'AND'algorithm' 
#>   8     3 'water+flooding'AND'machine-learning'AND'SVM'AND'algorithm'  
#>   9     5 'water+injection'AND'machine+learning'AND'SVM'AND'algorithm' 
#>  10     3 'water+flooding'AND'machine+learning'AND'SVM'AND'algorithm'  
#>  11     2 'water+injection'AND'intelligent'AND'SVM'AND'algorithm'      
#>  12     1 'water+flooding'AND'intelligent'AND'SVM'AND'algorithm'       
#>  13    62 'water+injection'AND'machine-learning'AND'genetic'AND'algorithm' 
#>  14    24 'water+flooding'AND'machine-learning'AND'genetic'AND'algorithm' 
#>  15    62 'water+injection'AND'machine+learning'AND'genetic'AND'algorithm' 
#>  16    24 'water+flooding'AND'machine+learning'AND'genetic'AND'algorithm' 
#>  17   145 'water+injection'AND'intelligent'AND'genetic'AND'algorithm'  
#>  18    75 'water+flooding'AND'intelligent'AND'genetic'AND'algorithm'
p.df
#> $keywords
#> # A tibble: 18 x 7
#>    Var1            Var2             Var3   Var4  paper_count sf    url    
#>    <chr>           <chr>            <chr>  <chr>       <dbl> <chr> <chr>  
#>  1 water injection machine-learning neura~ algo~       37.0  'wat~ "https~
#>  2 water flooding  machine-learning neura~ algo~       16.0  'wat~ "https~
#>  3 water injection machine learning neura~ algo~       37.0  'wat~ "https~
#>  4 water flooding  machine learning neura~ algo~       16.0  'wat~ "https~
#>  5 water injection intelligent      neura~ algo~       70.0  'wat~ "https~
#>  6 water flooding  intelligent      neura~ algo~       30.0  'wat~ "https~
#>  7 water injection machine-learning SVM    algo~        5.00 'wat~ "https~
#>  8 water flooding  machine-learning SVM    algo~        3.00 'wat~ "https~
#>  9 water injection machine learning SVM    algo~        5.00 'wat~ "https~
#> 10 water flooding  machine learning SVM    algo~        3.00 'wat~ "https~
#> 11 water injection intelligent      SVM    algo~        2.00 'wat~ "https~
#> 12 water flooding  intelligent      SVM    algo~        1.00 'wat~ "https~
#> 13 water injection machine-learning genet~ algo~       62.0  'wat~ "https~
#> 14 water flooding  machine-learning genet~ algo~       24.0  'wat~ "https~
#> 15 water injection machine learning genet~ algo~       62.0  'wat~ "https~
#> 16 water flooding  machine learning genet~ algo~       24.0  'wat~ "https~
#> 17 water injection intelligent      genet~ algo~      145    'wat~ "https~
#> 18 water flooding  intelligent      genet~ algo~       75.0  'wat~ "https~
#> 
#> $papers
#> # A tibble: 617 x 7
#>    title_data       paper_id  source  type   year author1_data   keyword  
#>    <chr>            <chr>     <chr>   <chr> <int> <chr>          <chr>    
#>  1 Application Of ~ "       ~ "     ~ "   ~  2002 Zheng, Jian, ~ 'water+f~
#>  2 Adopting Simple~ "       ~ "     ~ "   ~  2011 Al-Mudhafer, ~ 'water+f~
#>  3 Proactive Optim~ "       ~ "     ~ "   ~  2016 Haghighat Sef~ 'water+f~
#>  4 Application of ~ "       ~ "     ~ "   ~  2012 Al-Mudhafer, ~ 'water+f~
#>  5 Efficient Well ~ "       ~ "     ~ "   ~  2008 Sarma, Pallav~ 'water+f~
#>  6 Novel Applicati~ "       ~ "     ~ "   ~  2017 Prakasa, Bona~ 'water+f~
#>  7 An Optimization~ "       ~ "     ~ "   ~  2013 Yan, Xia, Uni~ 'water+f~
#>  8 Real-Time Optim~ "       ~ "     ~ "   ~  2015 Temizel, Cenk~ 'water+f~
#>  9 Comparisons Of ~ "       ~ "     ~ "   ~  2010 Samier, Pierr~ 'water+f~
#> 10 Optimizing Wate~ "       ~ "     ~ "   ~  2014 Prada Mejía, ~ 'water+f~
#> # ... with 607 more rows

Reducing test 5.2

# provide two different set of keywords to combine as vectors
major   <- c("waterflooding")
minor   <- c("machine-learning", "artificial intelligence")
lesser  <- c("algorithm")
another <- c("data-mining")
more    <- c("data-driven")

p.df <- join_keywords(major, minor, lesser, another, more, get_papers = TRUE)
#>   1    14 'waterflooding'AND'machine-learning'AND'algorithm'AND'data-mining'AND'data-driven' 
#>   2    10 'waterflooding'AND'artificial+intelligence'AND'algorithm'AND'data-mining'AND'data-driven'
p.df
#> $keywords
#> # A tibble: 2 x 8
#>   Var1          Var2   Var3   Var4  Var5  paper_count sf       url        
#>   <chr>         <chr>  <chr>  <chr> <chr>       <dbl> <chr>    <chr>      
#> 1 waterflooding machi~ algor~ data~ data~        14.0 'waterf~ "https://w~
#> 2 waterflooding artif~ algor~ data~ data~        10.0 'waterf~ "https://w~
#> 
#> $papers
#> # A tibble: 24 x 7
#>    title_data       paper_id  source  type   year author1_data  keyword   
#>    <chr>            <chr>     <chr>   <chr> <int> <chr>         <chr>     
#>  1 Practical Appli~ "       ~ "     ~ "   ~  2015 Amirian, Ehs~ 'waterflo~
#>  2 Data-Driven Mod~ "       ~ "     ~ "   ~  2013 Dzurman, Pet~ 'waterflo~
#>  3 Turning Data in~ "       ~ "     ~ "   ~  2016 Temizel, Cen~ 'waterflo~
#>  4 Predicting Wate~ "       ~ "     ~ "   ~  2002 Fedenczuk, L~ 'waterflo~
#>  5 Predicting Wate~ "       ~ "     ~ "   ~  2006 Fedenczuk, L~ 'waterflo~
#>  6 Developing a Sm~ "       ~ "     ~ "   ~  2017 Alenezi, Fai~ 'waterflo~
#>  7 Holistic Workfl~ "       ~ "     ~ "   ~  2011 Zangl, Georg~ 'waterflo~
#>  8 Applying Analyt~ "       ~ "     ~ "   ~  2014 Bravo, Cesar~ 'waterflo~
#>  9 Intelligent Pro~ "       ~ "     ~ "   ~  2011 Khazaeni, Ya~ 'waterflo~
#> 10 Water Productio~ "       ~ "     ~ "   ~  2011 Hermann, Rol~ 'waterflo~
#> # ... with 14 more rows