bibliometrics-quick-notes.github.io/KB-Quick-Notes.tex at main · bibliometrics-quick-notes/bibliometrics-quick-notes.github.io · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
% Options for packages loaded elsewhere
% Options for packages loaded elsewhere
\PassOptionsToPackage{unicode,hidelinks}{hyperref}
\PassOptionsToPackage{hyphens}{url}
\PassOptionsToPackage{dvipsnames,svgnames,x11names}{xcolor}
%
\documentclass[
  letterpaper,
]{scrreprt}
\usepackage{xcolor}
\usepackage{amsmath,amssymb}
\setcounter{secnumdepth}{-\maxdimen} % remove section numbering
\usepackage{iftex}
\ifPDFTeX
  \usepackage[T1]{fontenc}
  \usepackage[utf8]{inputenc}
  \usepackage{textcomp} % provide euro and other symbols
\else % if luatex or xetex
  \usepackage{unicode-math} % this also loads fontspec
  \defaultfontfeatures{Scale=MatchLowercase}
  \defaultfontfeatures[\rmfamily]{Ligatures=TeX,Scale=1}
\fi
\usepackage{lmodern}
\ifPDFTeX\else
  % xetex/luatex font selection
  \setmainfont[]{Atkinson Hyperlegible Next}
  \setsansfont[]{Atkinson Hyperlegible Next}
  \setmathfont[]{Atkinson Hyperlegible Mono}
\fi
% Use upquote if available, for straight quotes in verbatim environments
\IfFileExists{upquote.sty}{\usepackage{upquote}}{}
\IfFileExists{microtype.sty}{% use microtype if available
  \usepackage[]{microtype}
  \UseMicrotypeSet[protrusion]{basicmath} % disable protrusion for tt fonts
}{}
\makeatletter
\@ifundefined{KOMAClassName}{% if non-KOMA class
  \IfFileExists{parskip.sty}{%
    \usepackage{parskip}
  }{% else
    \setlength{\parindent}{0pt}
    \setlength{\parskip}{6pt plus 2pt minus 1pt}}
}{% if KOMA class
  \KOMAoptions{parskip=half}}
\makeatother
% Make \paragraph and \subparagraph free-standing
\makeatletter
\ifx\paragraph\undefined\else
  \let\oldparagraph\paragraph
  \renewcommand{\paragraph}{
    \@ifstar
      \xxxParagraphStar
      \xxxParagraphNoStar
  }
  \newcommand{\xxxParagraphStar}[1]{\oldparagraph*{#1}\mbox{}}
  \newcommand{\xxxParagraphNoStar}[1]{\oldparagraph{#1}\mbox{}}
\fi
\ifx\subparagraph\undefined\else
  \let\oldsubparagraph\subparagraph
  \renewcommand{\subparagraph}{
    \@ifstar
      \xxxSubParagraphStar
      \xxxSubParagraphNoStar
  }
  \newcommand{\xxxSubParagraphStar}[1]{\oldsubparagraph*{#1}\mbox{}}
  \newcommand{\xxxSubParagraphNoStar}[1]{\oldsubparagraph{#1}\mbox{}}
\fi
\makeatother


\usepackage{longtable,booktabs,array}
\usepackage{calc} % for calculating minipage widths
% Correct order of tables after \paragraph or \subparagraph
\usepackage{etoolbox}
\makeatletter
\patchcmd\longtable{\par}{\if@noskipsec\mbox{}\fi\par}{}{}
\makeatother
% Allow footnotes in longtable head/foot
\IfFileExists{footnotehyper.sty}{\usepackage{footnotehyper}}{\usepackage{footnote}}
\makesavenoteenv{longtable}
\usepackage{graphicx}
\makeatletter
\newsavebox\pandoc@box
\newcommand*\pandocbounded[1]{% scales image to fit in text height/width
  \sbox\pandoc@box{#1}%
  \Gscale@div\@tempa{\textheight}{\dimexpr\ht\pandoc@box+\dp\pandoc@box\relax}%
  \Gscale@div\@tempb{\linewidth}{\wd\pandoc@box}%
  \ifdim\@tempb\p@<\@tempa\p@\let\@tempa\@tempb\fi% select the smaller of both
  \ifdim\@tempa\p@<\p@\scalebox{\@tempa}{\usebox\pandoc@box}%
  \else\usebox{\pandoc@box}%
  \fi%
}
% Set default figure placement to htbp
\def\fps@figure{htbp}
\makeatother


\setlength{\emergencystretch}{3em} % prevent overfull lines

\providecommand{\tightlist}{%
  \setlength{\itemsep}{0pt}\setlength{\parskip}{0pt}}


\usepackage[a4paper]{geometry}
\usepackage{ccicons}
\usepackage{academicons}
\definecolor{orcidlogocol}{HTML}{A6CE39}
\usepackage{ragged2e}
\usepackage{tikz}
\makeatletter
\@ifpackageloaded{bookmark}{}{\usepackage{bookmark}}
\makeatother
\makeatletter
\@ifpackageloaded{caption}{}{\usepackage{caption}}
\AtBeginDocument{%
\ifdefined\contentsname
  \renewcommand*\contentsname{Table of contents}
\else
  \newcommand\contentsname{Table of contents}
\fi
\ifdefined\listfigurename
  \renewcommand*\listfigurename{List of Figures}
\else
  \newcommand\listfigurename{List of Figures}
\fi
\ifdefined\listtablename
  \renewcommand*\listtablename{List of Tables}
\else
  \newcommand\listtablename{List of Tables}
\fi
\ifdefined\figurename
  \renewcommand*\figurename{Figure}
\else
  \newcommand\figurename{Figure}
\fi
\ifdefined\tablename
  \renewcommand*\tablename{Table}
\else
  \newcommand\tablename{Table}
\fi
}
\@ifpackageloaded{float}{}{\usepackage{float}}
\floatstyle{ruled}
\@ifundefined{c@chapter}{\newfloat{codelisting}{h}{lop}}{\newfloat{codelisting}{h}{lop}[chapter]}
\floatname{codelisting}{Listing}
\newcommand*\listoflistings{\listof{codelisting}{List of Listings}}
\makeatother
\makeatletter
\makeatother
\makeatletter
\@ifpackageloaded{caption}{}{\usepackage{caption}}
\@ifpackageloaded{subcaption}{}{\usepackage{subcaption}}
\makeatother
\usepackage{bookmark}
\IfFileExists{xurl.sty}{\usepackage{xurl}}{} % add URL line breaks if available
\urlstyle{same}
\hypersetup{
  pdftitle={KB Quick Notes},
  pdfauthor={Dr.~Stephan Gauch},
  colorlinks=true,
  linkcolor={blue},
  filecolor={Maroon},
  citecolor={Blue},
  urlcolor={Blue},
  pdfcreator={LaTeX via pandoc}}


\title{KB Quick Notes}
\author{Dr.~Stephan Gauch}
\date{}
\begin{document}
\begin{titlepage}
\tikz [remember picture, overlay] %
\node [shift={(1cm,-1cm)}] at (current page.north west) %
[anchor=north west] %
{\includegraphics[scale=.15]{images/KB_logo_black-light.jpg}};

  \vfill
  \centering
  {\Huge \bfseries KB Quick Notes \par}
  \vfill
  {\Large
   Dr. Stephan Gauch \href{https://orcid.org/0000-0002-4715-5400}{\textcolor{orcidlogocol}{\aiOrcid}}\ \par
   {\large Robert K. Merton Zentrum für Wissenschaftsforschung \par
   Humboldt-Universität zu Berlin }}
  \vfill
  \justify
  \ccby \\ This work is licensed under a \href{https://creativecommons.org/licenses/by/4.0/deed.en}{Creative Commons Attribution 4.0 License}
\end{titlepage}

\renewcommand*\contentsname{Table of contents}
{
\hypersetup{linkcolor=}
\setcounter{tocdepth}{2}
\tableofcontents
}

\bookmarksetup{startatroot}

\chapter{What is all this?}\label{what-is-all-this}

In the legal professions, there is a concept that in German carries the
wonderful name ``Loseblattsammlung'' (aka looseleaf binder, often as
part of a subscription then called looseleaf service). You pay for the
joy of receiving bundles of notes on some legal topic or issue, that you
then may neatly file away in a special folder in order to be ``up to
date'' on all the relevant facts and concerns surrounding said topic. In
a way, what you are currently looking at is also some sort of a
looseleaf binder, in that it aims to inform you on things. Yet, in
contrast to being ``all up to date all the time,'' it is rather
something that should get you ``up to speed'' in that it gives you a
starting point for a subset of topics and concepts in bibliometrics.
Also, there is no real service to subscribe to. Rather, these notes are
a starting point of starting points.

\section{But\ldots{} why?}\label{but-why}

``Why not write a proper textbook?'' you might say. And yeah, sure,
thanks, great suggestion! I probably could give a number of excuses. But
rather than those, I give you this. Also, I thought it would be best to
have something to just jump in without prior training in order to set a
basis for an inclusive discourse about the concepts and ideas discussed.
Having recently screened the state of bibliometrics in use, I see a lot
of contributions that would probably not qualify for publication in a
bibliometric core journal. I don't think that is a problem in itself. I
also don't subscribe to the notion that authors of such contributions
are per se ``incompetent'' or just have to ``see the light'' or should
be prevented from trying new things. Not necessarily (only) for moral
reasons - the latent elitism should be obvious - but for pragmatic ones.
There is a lot of talk about being ``responsible'' when working with
metrics. That's true for sure, right? Yet, it also is also quite funny
in my book, since one probably would have a bit of a hard time to find
vocal advocates for a) the ``irresponsible'' use of metrics, or, for
that matter, b) ``meaningless'' metrics or c) measuring what is
``irrelevant''. So these pleas don't really do much on their own.
Anyway! Rather than thinking of commandments, I opted for this:
Providing a simple starting point to allow interested newcomers to join
the party.

\section{Who (I think) might enjoy reading
this?}\label{who-i-think-might-enjoy-reading-this}

Well, first, foremost and obviously, anyone who wants to know what
bibliometrics is about.

Obvious candidates are:

\begin{itemize}
\tightlist
\item
  Students not having access to an introductory bibliometrics course and
  want to start learning.
\item
  Students that do have access to an introductory bibliometrics course,
  but struggle with getting a grip on the basic concepts and how they
  relate to each other.
\item
  Practitioners and administrators planning to use bibliometrics in
  their work and need a starting point (sic!) on what's what.
\item
  Policymakers who want to engage in a discourse with the bibliometric
  community and connect on the level of language use, what concepts
  bibliometricians believe to be relevant and why they believe that.
\item
  Anyone who wants to better understand a bibliometric study.
\end{itemize}

\section{Where to start with the Quick
Notes?}\label{where-to-start-with-the-quick-notes}

Is there a structure to the individual notes? Do they have to be read in
a particular order? Yes, and no. No, you may read this in any order you
see fit. Yes, starting with the basics might be a good idea, but I guess
you would have figured that out on your own and would not need a cue for
that. Also yes, I tend to bracket bibliometrics into the following
territories and did so in the Quick Notes as well.

\textbf{Evaluative bibliometrics}, the territory of the vertical, where
the tideous repetitiveness lives. What is ``countable'', based on an
argument of shared characteristics, e.g.~one citation being just like
any other citation, is stacked up to find out who are the ``best of the
best of the really very, very bestest''. For better or worse as numerous
reform initiatives suggest.

\textbf{Explorative bibliometrics} is the territory of the horizontal.
The realm of mapping and charting the old and the new or finding some
path from one peculiarity to another. How do topics evolve? Who
cooperates with whom? These sorts of things. The Explorative and the
Evaluative are quite neighborly, since you can try to translate many
horizontal phenomena into vertical signifiers of ``value'' or
``quality''. A bibliometric mapping exercise can be both speak for
``success in community building'' as well as ``representation of a
community''. Hence, borders here are sometimes rather loosely defined.
Yet, both can have rather different ideas of what is ``relevant'' or
``adequate''.\footnote{Using a ``citation window'' means to count the
  incoming citations to a contribution of a pre-defined period of time
  in order to prevent unfair comparison of old and new publications.
  After all, the old ones had more time to get cited. Makes sense in
  evaluation, but probably less so when the aim is to explore or map
  something. The difference between a) ``We need to compare fairly and
  therefore have to control for unfair advantage of old publications!''
  does not play well with b) ``Why cut off a part of the past of a
  field, when your interest is in the past of a field?''. There is, of
  course, a Quick Note on Citation Windows.}

Both of these territories share that they work ``with'' databases in
some way or another.

\textbf{Curative bibliometrics} is the territory responsible for keeping
things in order. Pruning, weeding things out, naming all the things
properly, etc. In practical terms: Making sure that mapping and counting
can be done ``adequately'' and in a proper orderly fashion. Obviously,
this is less working with the database but rather ``on'' the databases.

Finally, \textbf{reflexive bibliometrics}, a territory that I consider
myself the most with nowadays, is a home for those who do not
necessarily(!) work with databases or not necessarily(!) work on
databases - usually they can do both, too - but try to understand how
working with and working on databases shapes what bibliometricians do,
what others do with what bibliometricians do, and what others,
e.g.~researchers, do when observing what other others do, e.g.~funders,
with things that bibliometricians do and how all of this plays together.
So, inhabitants are rather interested in the idea of working on the
profession of bibliometrics and performativity of bibliometrics,
i.e.~how measures or maps rather ``produce'' what they seek to ``show''.

Anyway. I digress, again. Let's move on.

\section{How the Quick Notes are
structured}\label{how-the-quick-notes-are-structured}

The notes themselves definitely have an internal structure. All
documents start with a ``What is this about'' section, giving a rough
idea what concept the note is about. Next, you will find some sort of a
situating passage. Why is that concept important in bibliometrics? Where
applicable, you will find a passage on ``How does this work?'' giving
some verbalized idea of the procedures involved.

The Quick Notes do NOT contain code. Preposterous! Where is the
tutorial? Isn't bibliometrics also a craft? Of course it is! And there
might be more to come. Eventually. I guess. Anyway!

The last section contains limitations and critique towards or at least
related to the concept. These are not the result of a careful screening
exercise but rather me channeling the field. Last but not least:
References. Another starting point. Obviously, that selection is
illustrative and not exemplary.

Another aspect that structures the Quick Notes is their length. They are
almost insultingly short given the glorious purpose an ambitious reader
might throw at them. Needless to say: Each is a rabbit hole. A
bottomless pit of sorts. Each and every one of the topics makes for a
quite wonderful fetish. Consider these notes a shovel, at maximum a very
tiny and short ladder. The absolute minimum that these should achieve
though is getting you, dear reader, to be able to discuss these issues
with those that are more versed and have already dug a deeper hole for
themselves. The absolute maximum is getting you hooked on one or more of
these fetishes.

\section{That's an awful lot of starting points and not much of a
conclusion}\label{thats-an-awful-lot-of-starting-points-and-not-much-of-a-conclusion}

Indeed. This is intentional and pretty much by design. These notes are
supposed to leave you, dear reader, a tad unfulfilled. So it's not an
all-inclusive trip but rather a badly drawn map on a napkin to instill
the recklessness of, dare I say, adventure. Also, you might have
realised that the Quick Notes don't have a DOI. This is not a
coincidence! You should, for multiple reasons, not reference these
notes. First of all, they are, for reasons argued above and didactic
purposes, shallow, dramatically short, on the brink of being
oversimplified. The aim is to have you understand the basics. So, rather
than referencing these notes, I would rather have them shared.

Some final remarks: Why did you not make this a proper Wiki? Well, all
of this is licensed CC-BY. Go forth and wreak havoc. Why did you not
cover {[}insert your favourite fetish here{]}? Great question! Thank
you! Can I contribute to this or write a Quick Note myself? Color me
delighted! For the moment, I guess you might just contact me about this:
\href{mailto:stephan.gauch@hu-berlin.de}{\nolinkurl{stephan.gauch@hu-berlin.de}}.

Anyway.

Enjoy the ride.

Berlin, 18 Jul 2025

\section{Acknowledgments}\label{acknowledgments}

I want to thank a lot of people. First of all, the Working Group
``Competence development'' of the
\href{https://bibliometrie.info/en/}{Competence Network Bibliometrics
(KB)}, who participated in the selection of and discussion on these
topics. I also would extend my thanks to Sophia Dörner, Beatrice
Yefimov, and Najko Jahn, who supported the finalisation and polishing of
these Quick Notes. All the remaining shortcomings that survived these
scrutinous efforts are probably, no, eventually, intentional but in any
case my own.

The development of these Quick Notes was partially funded by the Federal
Ministry of Research, Technology and Space (BMFTR) in terms of the
VaMoKo project (FKZ 16WIK2101D).

The material is free to use and re-use, licensed under the
\href{https://creativecommons.org/licenses/by/4.0/deed.en}{CC BY 4.0
License}.

\part{Basics}

\chapter{Bradford's Law of
Scattering}\label{bradfords-law-of-scattering}

\section{What is Bradford's Law
about?}\label{what-is-bradfords-law-about}

Bradford's Law, formulated by Samuel C. Bradford in 1934, is a pattern
often found in bibliometric studies, namely that in bibliometrics pretty
much everything is distributed \emph{log-normal}, i.e., among other
characteristics, heavily right-skewed. The law describes the scatter or
dispersion of scientific literature, that, if scientific journals are
arranged in order of decreasing productivity of articles on a given
subject, they may be divided into a nucleus of journals (so-called core
journals) more particularly devoted to the subject and several groups or
zones containing the same number of articles as the nucleus when the
journals are increasingly less representative to the subject as a whole,
i.e.~they contain less works related to a specific matter. Bradford's
law then predicts the number of journals within the nucleus and zones.
The relationship of size of the nucleus and the zones is, at least
according to the original paper by Bradford: \(1:b:b^2\). This may not
be universally applicable today due to changes in the scientific
publication landscape. Yet, in principle, the concept still holds up in
the way Bradford's Law is famously summarized as:

\begin{quote}
A small core of journals will account for the majority of significant
scientific papers.
\end{quote}

\section{Why is Bradford's Law
important?}\label{why-is-bradfords-law-important}

Bradford's Law has been important for understanding and managing the
expanse of scientific literature, assisting librarians and information
scientists in identifying influential journals in a particular field,
thus enabling allocation of resources for library collections. In the
context of practical applications it may therefore support resource
management in libraries by focusing on core journals. It also aids
researchers in targeting the most focused journals for their work, both
for reading and publication purposes. In this sense, it is also
important for both evaluative and exploratory bibliometrics. Especially
when aiming to evaluate or explore a thematic field, be it a discipline
or a topic, using, at least in part, a journal-based strategy,
Bradford's Law will highlight both the strength and the weakness of a
journal-based approach. First and foremost, Bradford's Law provides an
excellent argument to focus on core journals to produce an effective
query for delineating a field or topic as it helps in identifying the
most productive journals quickly and at the same time provides an
argument, why there are limits to journal-based strategies; especially
in the 2nd zone, the number of journals is squared compared to the 1st
zone and this 1st zone already contains a factorised number of journals
compared to the original core journals. The strengths in relying on
Bradford's Law to design effective queries are that a journal-based
strategy will produce a vast number of results very quickly by
identifying the core journals, perhaps together with domain experts on
the subject in question. This benefit is usually in part qualified by
publisher practices aiming to bundle journals into subscription packages
providing marginal reductions when choosing said packages over
individual subscriptions. Finally, Bradford's Law may be used to give a
rough prediction of a field's size. Following the general rule of
relationship between the 1st and 2nd zones, very rough assumptions can
be made about how \emph{large} a scientific field might be.

\section{Limitations}\label{limitations}

\subsubsection{\texorpdfstring{Sticking to Bradford's Law will make your
query more
\emph{mainstream}}{Sticking to Bradford's Law will make your query more mainstream}}\label{sticking-to-bradfords-law-will-make-your-query-more-mainstream}

The main weaknesses lie in coverage. Over-reliance on Bradford's Law
will produce results that may over-represent the \emph{mainstream} of a
topic or field. In a similar vein, optimizing search strategies gearing
toward Bradford's law may not be possible in highly emerging topics or
fields, or with high degrees of interdisciplinarity, where the
publication strategies of authors may be geared toward more generalized
outlets due to the lack of specialized journals not being available in
early stages.

\subsubsection{Bradford's Law might be driven by multidisciplinary
applications}\label{bradfords-law-might-be-driven-by-multidisciplinary-applications}

Another limitation of Bradford's Law lies in the subjectivity of both
article classification and journal selection. Especially in the
long-tail of the distribution a single article in a journal will
contribute to increasing the skewness of the overall distribution. In
the case of bibliometrics, which is increasingly being used as a basis
for tracing actual developments of fields or topics, e.g.~in the context
of systematic reviews, a large number of papers are \emph{applications}
of bibliometrics rather than bibliometric research in the stricter
sense.

\subsubsection{Bradford's Law is (in some sense) field
dependent}\label{bradfords-law-is-in-some-sense-field-dependent}

Bradford's Law may not apply uniformly across different subjects or
disciplines. How it diverges from the ideal constants and coefficients
may be dependent on a number of characteristics, i.e.~a field's age, the
application orientation of a field, tendencies for interdisciplinarity,
rhythms of scientific outputs, funding etc. The digital age and open
access movements have further altered the publication landscape and
access patterns, which may also further affect the applicability and
usefulness of Bradford's Law.

\subsubsection{Bradford's Law abstracts from
quality}\label{bradfords-law-abstracts-from-quality}

Bradford's law makes absolutely no claim about quality. Scholars might
jump to the conclusion that articles in the fringes, i.e.~the 2nd zone,
may always be of \emph{low quality}. Even though anecdotal evidence
might reflect this, especially in fields of high
application-orientation, this can not qualify as a general rule. One
prominent example might be the Hirsch-Index, a prominent measure of the
productivity and impact of a researcher in their field. The article
introducing the Hirsch-Index was published in Proceedings of the
National Academy of Sciences (PNAS). Definitely not one of the core
journals, an influential paper nonetheless.

\section{Further Reading}\label{further-reading}

Bailón-Moreno, R., Jurado-Alameda, E., Ruiz-Baños, R., \& Courtial, J.
P. (2005). Bibliometric laws: Empirical flaws of fit.
\emph{Scientometrics, 63}(2), 209--229.
\url{https://doi.org/10.1007/s11192-005-0211-5}

Bradford, S. C. (1934). Sources of information on specific subjects.
\emph{Engineering, 26}(4), 85--86.

Glänzel, W., \& Thijs, B. (2012). Using `core documents' for detecting
and labelling new emerging topics. \emph{Scientometrics, 91}(2),
399--416. \url{https://doi.org/10.1007/s11192-011-0591-7}

Mutschke, P., \& Mayr, P. (2015). Science models for search: A study on
combining scholarly information retrieval and scientometrics.
\emph{Scientometrics, 102}(3), 2323--2345.
\url{https://doi.org/10.1007/s11192-014-1485-2}

Nicolaisen, J., \& Hjørland, B. (2007). Practical potentials of
Bradford's law: A critical examination of the received view.
\emph{Journal of Documentation, 63}(3), 359--377.
\url{https://doi.org/10.1108/00220410710743298}

Shenton, A. K., \& Hay-Gibson, N. V. (2011). Bradford's Law and its
relevance to researchers. \emph{Education for Information, 27}(4),
217--230. \url{https://doi.org/10.3233/EFI-2009-0882}

\chapter{Citation Windows}\label{citation-windows}

\section{What is a Citation Window?}\label{what-is-a-citation-window}

A citation window refers to the specific time period during which
citations to a contribution are counted and analyzed. This window can
range from a few years to several decades. In practice, however, the
periods for citation windows typically range between 2 and 5 years.

\section{Why is it Important?}\label{why-is-it-important}

Using citation windows is important because they directly influence the
calculation of impact of a contribution. They achieve this by addressing
a specific notion of \emph{fairness} that relates to the comparison of
recent and old research. The first argument for citation windows is that
old articles had more time to accumulate citations and therefore have an
unfair advantage over recent ones. The second argument is that in order
for a scientific article to be referenced at all, it usually needs a
certain amount of time to get recognised in the publication landscape.
The rather idealized idea is that an article is being read, has some
relevance in some research process, the research itself is being
performed, the article is being referenced in a manuscript, which again
is submitted to a journal and goes through rounds of peer review until
finally, after publication, and some additional time to be included in a
bibliographic database, the original contribution we are interested in,
has attracted a citation. Obviously, different fields have different
speeds at which research is cited. For instance, in rapidly evolving
fields like information technology, shortening the citation window may
be more helpful, while in disciplines with longer research cycles, such
as history, longer windows may be necessary.

\section{How Does it Work?}\label{how-does-it-work}

The length of a citation window strongly depends on the task at hand,
the requirements toward immediacy or the research question as well as
the practices of the field in question. Citations are then collected and
analyzed only within this predefined period. For instance, a 5-year
citation window starting from the year of publication means that only
citations received within those five years are considered in the
analysis. This information is not always part of publicly available
data.

\section{Limitations}\label{limitations-1}

\subsubsection{Does not account for slow
burns}\label{does-not-account-for-slow-burns}

One of the primary issues with citation windows is the risk of
misrepresenting the impact of research. Short windows may not capture
the long-term influence of a contribution, especially in fields where
citations accumulate slowly. Such \emph{sleeping beauties} might receive
little attention at first but a surge in reception after some time. If
this period is no longer within the citation window this reception is
being ignored.

\subsubsection{The longer the citation window - the older (and less
relevant) the
results}\label{the-longer-the-citation-window---the-older-and-less-relevant-the-results}

Longer windows may include citations that are less relevant to the
current state of the field, due to different rhythms of scientific
output. In some fields publication propensity favors shorter time frames
from research to publishing. This effect may even be catalyzed further
when considering differences in project or research process length. In
fields where rhythms are tight, longer citation windows might cover
things that are no longer relevant. Yet, context matters,
i.e.~distinctions have to be made between citation windows as a means to
discount relevance vs.~a means of preventing measures from being overly
influenced by matters of temporality.

\subsubsection{Citation windows might not fit the notion to be evaluated
against}\label{citation-windows-might-not-fit-the-notion-to-be-evaluated-against}

Even though citation windows are useful for matters of comparison it may
not be sensible to use when temporality is explicitly part of the
concept to evaluate against. One example can be the Hirsch-Index, which
puts a strong premium on \emph{breadth of impact}, which in turn makes
it a bad measure to use in the context of evaluation of junior
scientists. In the case of the Hirsch-Index the notion of \emph{breadth}
does not match up to the measurement logic of restricting temporality.
Similarly other evaluations that somehow address notions of
\emph{long-termedness} will not benefit from citation windows.

\subsubsection{Citation windows might be counterproductive for
exploratory
purposes}\label{citation-windows-might-be-counterproductive-for-exploratory-purposes}

Citation windows make intuitive sense in the context of evaluative
bibliometrics. In exploratory bibliometrics they might be
counter-productive. For instance, when questions of evolution or
dynamics of a field are relevant to the purpose of exploring. More often
than not, the exploratory bibliometrics involve understanding the
\emph{origin story} of a field or topic. Where did it come from? How did
it emerge? In this case limiting analysis by using citation windows may
eliminate just these interesting classics, e.g.~from co-citation
analyses.

\subsubsection{Citation windows only address one
issue}\label{citation-windows-only-address-one-issue}

Time isn't the only factor that influences citation counts. Document
type is another very plausible candidate with review papers usually
receiving a considerable premium on citations flowing in. Moreover, the
choice of a citation window can introduce bias, as it might favor
certain types of publications or disciplines over others. This might not
be a strong limitation or argument against the use of citation windows
but should just be a reminder that citation windows do not fix all
potential biases.

\subsubsection{Citation windows may be inherently
inaccurate}\label{citation-windows-may-be-inherently-inaccurate}

When calculating citation windows analysts have to be aware of what
information specifically the citation window is calculated on.
Sometimes, when calculation is performed using the publication year,
publications from early months in the year may receive a substantial
premium. Using the actual publication date might change the story quite
a bit.

\subsubsection{Be aware of difference in citation
windows}\label{be-aware-of-difference-in-citation-windows}

There is no universal standard when it comes to citation window length.
When interpreting or merging data the mere information that a citation
window has been applied may therefore be insufficient. There's also the
challenge of comparing studies using different citation windows, which
can lead to inconsistencies in interpretation of indicators.

\section{Further Reading}\label{further-reading-1}

Campanario, J. M. (2011). Empirical study of journal impact factors
obtained using the classical two-year citation window versus a five-year
citation window. \emph{Scientometrics, 87}(1), 189--204.
\url{https://doi.org/10.1007/s11192-010-0334-1}

Donner, P. (2018). Effect of publication month on citation impact.
\emph{Journal of Informetrics, 12}(1), 330--343.
\url{https://doi.org/10.1016/j.joi.2018.01.012}

Glänzel, W. (2004). Towards a model for diachronous and synchronous
citation analyses. \emph{Scientometrics, 60}(3), 511--522.
\url{https://doi.org/10.1023/B:SCIE.0000034391.06240.2a}

Glänzel, W., Schlemmer, B., \& Thijs, B. (2003). Better late than never?
On the chance to become highly cited only beyond the standard
bibliometric time horizon. \emph{Scientometrics, 58}(3), 571--586.
\url{https://doi.org/10.1023/B:SCIE.0000006881.30700.ea}

Wang, J. (2013). Citation time window choice for research impact
evaluation. \emph{Scientometrics, 94}(3), 851--872.
\url{https://doi.org/10.1007/s11192-012-0775-9}

\chapter{Coverage in Bibliometrics}\label{coverage-in-bibliometrics}

\section{What does Coverage mean in
Bibliometrics?}\label{what-does-coverage-mean-in-bibliometrics}

In bibliometrics, \emph{coverage} refers to the extent to which a
bibliographic database (or tool) includes relevant publications,
journals, conferences, and other scholarly outputs within a specific
field or across multiple disciplines. Coverage can be evaluated in terms
of the breadth (range of subjects, disciplines, or publication types)
and depth (historical range, level of detail in indexing) of the
included materials. Effective coverage is crucial for comprehensive
bibliometric analyses, as it directly impacts the accuracy and
reliability of the results derived from the data. Besides the notion of
coverage from a disciplinary perspective, it can also refer to the
occurrence of missing data on the level of individual records in a
database, which may also be referred to as \emph{completeness}.

\section{Why is it Important?}\label{why-is-it-important-1}

There is no complete database of scientific publications including all
the features that would qualify current bibliometric analyses. In that
sense all databases are defined by a focus, some might say bias, towards
specific criteria of inclusion. Both notions of coverage have
substantial implications for bibliometric analyses. For instance,
adequate coverage ensures a more complete and accurate representation of
the scholarly landscape, which in turn enables more valid comparisons
across different fields, institutions, or time periods. Despite its
importance for bibliometrics, balancing quality, i.e., more precisely,
and more problematically, citation counts, vis-a-vis quantity or
completeness might not be as straightforward as one might expect in
matters of coverage. Among the current considerations are matters of
post-colonial concern, e.g.~contributions by the \emph{periphery} of the
science system such as the global south, with some scholars rejecting
the notion of a primacy of the center vs.~the periphery. Other aspects
that problematize coverage in this way is identification of trends and
gaps, where a higher coverage assists in identifying emerging research
trends and potential gaps in the literature. The issue therefore is more
complex than \emph{the more, the merrier}.

Evaluating and ensuring coverage usually involves multiple aspects, such
as:

\begin{enumerate}
\def\labelenumi{\arabic{enumi}.}
\tightlist
\item
  Assessment of Bibliometric Sources: Examining the scope and extent of
  bibliometric databases and tools to determine their coverage.
\item
  Transparent Data Inclusion Criteria: Defining criteria for what types
  of publications and sources are included.
\item
  Regular Updates and Expansion: Continuously updating and expanding
  databases to include new publications, journals, and other relevant
  scholarly outputs.
\item
  Cross-Database Comparison: Comparing data across multiple bibliometric
  sources to identify coverage overlaps and gaps.
\end{enumerate}

\section{How Does it Work?}\label{how-does-it-work-1}

Coverage is a concept that is more a matter of consideration than
something that can be \emph{performed} as a method in the stricter
sense. More often than not, due to the commercial nature of
bibliographic data and available resources. With databases from the
Web-of-Science-Family the criteria usually will be a derivative of
citations over time relative to publication volume on the level of
journals taking into account the disciplinary focus of said journals,
a.k.a \emph{quality}. Scopus seemingly aims for more of a balance
between quality and quantity. Yet, in both cases, how, when and why a
journal is included is not overly transparent. Moreover, which journals
are being \emph{put to review} is unclear as well, rendering the overall
composition a bit of a mystery at times. Descriptions of working groups
or committees in part exist, yet these are also only moderately
informative toward the issue. The prevalence of focus is not just
present in established databases but also extends to current contenders.
Sometimes at the cost of the second notion of coverage. The platform
\emph{Dimensions} features a larger collection of articles leading to an
overall larger citation network but has issues with completeness.

\section{Limitations}\label{limitations-2}

\subsubsection{Adequate Coverage is
pricey}\label{adequate-coverage-is-pricey}

Achieving and maintaining extensive coverage requires significant
resources and effort. This is especially true when understanding
coverage as a dynamic concept rather than a static silo logic at a
definite point in time. Main drivers are changes in the topic, field and
publication landscape.

\subsubsection{Coverage is politics by other
means}\label{coverage-is-politics-by-other-means}

Coverage can be biased towards certain languages, regions, or
disciplines, leading to skewed perspectives both from an evaluative as
well as explorative perspective. Keeping up with the rapidly evolving
landscape of scholarly publications is a continuous challenge.

\subsubsection{Coverage is exclusion}\label{coverage-is-exclusion}

Coverage should not be confused with unreflected inclusion. Selecting
journals, repositories, hubs or articles always involves a judgment,
even if done \emph{100\% algorithmically}. In this sense understanding
coverage can (and maybe should) be understood more along the lines of
curation. Extensive coverage may result in an overwhelming amount of
garbage data, complicating analysis and interpretation.

\section{Further Reading}\label{further-reading-2}

Daniel, B. Klein, \& Chiang, E. (2004). The Social Science Citation
Index: A Black Box---With an Ideological Bias? \emph{Econ Journal Watch,
1}(1), 134--165.

Harzing, A.-W., \& Alakangas, S. (2016). Google Scholar, Scopus and the
Web of Science: A longitudinal and cross-disciplinary comparison.
\emph{Scientometrics, 106}(2), 787--804.
\url{https://doi.org/10.1007/s11192-015-1798-9}

Larsen, P. O., \& Von Ins, M. (2010). The rate of growth in scientific
publication and the decline in coverage provided by Science Citation
Index. \emph{Scientometrics, 84}(3), 575--603.
\url{https://doi.org/10.1007/s11192-010-0202-z}

Mongeon, P., \& Paul-Hus, A. (2016). The journal coverage of Web of
Science and Scopus: A comparative analysis. \emph{Scientometrics,
106}(1), 213--228. \url{https://doi.org/10.1007/s11192-015-1765-5}

Stahlschmidt, S., \& Stephen, D. (2020). Comparison of Web of Science,
Scopus and Dimensions databases. Berlin: Deutsches Zentrum für
Hochschul- und Wissenschaftsforschung.
\url{https://bibliometrie.info/downloads/DZHW-Comparison-DIM-SCP-WOS.PDF}

\chapter{Institutional Disambiguation in
Bibliometrics}\label{institutional-disambiguation-in-bibliometrics}

\section{What does disambiguation mean in
Bibliometrics?}\label{what-does-disambiguation-mean-in-bibliometrics}

Disambiguation of affiliation data refers to the process of accurately
identifying and distinguishing the institutional affiliations of authors
in scholarly publications. This involves resolving ambiguities and
variations in the way institutions are named or represented in
publication data. For instance, an institution could be referred to by
different names or acronyms, or multiple departments within a single
institution could be listed separately. The aim is to ensure that each
publication is correctly attributed to the right institution. This also
includes diachronic changes, i.e.~affiliations splitting up or merging,
like in the case of the Karlsruhe Institute of Technology or (at least
temporarily) the Berlin Institute of Health. Apart from institutional
disambiguation, similar principles are being applied to disambiguate
author names, which usually proves to be a significantly harder
challenge for various reasons.

\section{Why is it Important?}\label{why-is-it-important-2}

Disambiguation is the prerequisite for accurate attribution. This aspect
might be the simplest factor why disambiguation is important in
bibliometrics: to ensure correct attribution of research output to
institutions, crucial for institutional rankings, reputation, and
funding. The notion of accuracy of attribution is not limited to
evaluative bibliometrics such as productivity assessments and impact
evaluations. Rather, it also is relevant, maybe even more relevant, in
exploratory bibliometrics, e.g.~in the context of analyses of
collaboration networks. Yet, disambiguation also provides further
benefits beyond bibliometrics, e.g.~in the context of in-house library
and resource management of organizations. Finally, disambiguation
supports overall research visibility of an organization by accurately
showcasing an institution's research contributions as well as enhancing
overall addressability, aiding in visibility and recognition in
industry, policy and the academic communities. All in all,
disambiguation provides the basis for fairer and more meaningful
comparisons between institutions and is indispensable for providing
clean and high quality data for policy-making and strategic decisions in
research management.

\section{How Does it Work?}\label{how-does-it-work-2}

There are numerous disambiguation approaches. Some will focus on the use
of structure-detection using multiple pieces of information. Others will
focus on fuzzy matching incorporating spelling errors. Other approaches
again might focus on so-called \emph{master list approaches} that are
basically extensively curated thesauri of spelling variants. Other
approaches again aim to use complementary data, such as WikiData, to
clean institution strings. In the Competence Network Bibliometrics the
current approach is to use regular expressions, basically matching for
delicately definable patterns rather than \emph{plain} searches, to
define an enormous set of positive and negative rules on how institution
strings are consolidated. These rules are applied to clean, if possible,
to the level of institutes and then aggregate back to the level of the
institution as a whole including start and end dates to account for
changes in institutional setups. Most of the approaches mentioned have a
quite similar workflow, which includes the following steps.

\begin{enumerate}
\def\labelenumi{\arabic{enumi}.}
\tightlist
\item
  Data Collection: Gathering affiliation data from consolidated data
  sources or publication records.
\item
  Identification of Variations: Recognizing different variations and
  representations of the same institution (differences based on the
  approach chosen).
\item
  Standardization and Matching: Standardizing the names and details of
  institutions and matching different variants to a single, standardized
  form.
\item
  Continuous Updating: Regularly updating the disambiguation process to
  accommodate new institutions, mergers, name changes, etc.
\end{enumerate}

The recursiveness and circular nature of this idealized approach should
make it clear that disambiguation usually is not a \emph{one-shot} task
but rather a continuous effort. Yet, for specific approaches and
questions using smaller datasets it may be admissible to use combined
approaches including structure detection and manual inference to arrive
at high quality data.

\section{Limitations}\label{limitations-3}

\subsubsection{All data is dirty all the
time}\label{all-data-is-dirty-all-the-time}

Disambiguation can quickly become a highly complex and resource
intensive task. Assuming that commercial, or non-commercial for that
matter, databases are sufficient for evaluatory or exploratory purposes
might be a gross lapse of judgment. Even though newer approaches aim to
mitigate the problem and some providers try to shift cleaning either
into algorithms, communities or clients, there currently is no database
that is perfectly clean in the regard described above, even though they
might claim so in sales pitches and promotional material.

\subsubsection{Disambiguating data is like feeding a
dragon}\label{disambiguating-data-is-like-feeding-a-dragon}

The process can be complex and resource-intensive, requiring
sophisticated algorithms and expert intervention. All these things are
costly and will require continuous and/or distributed efforts to
achieve. As soon as manual inference comes into play knowledge about the
national or local organizational landscape both within and beyond
stereotypical \emph{research organizations} is an absolute must! At
least for now high quality data simply won't come cheap. Also,
affiliation data is continually changing, making ongoing maintenance a
challenge as well as an imperative.

\subsubsection{Don't expect disambiguation to be standardized
(yet)}\label{dont-expect-disambiguation-to-be-standardized-yet}

Disambiguation will sometimes feature an obscene variability across
sources. How affiliation data is processed by the different data
providers is not consistent or comparable. This lack of standardisation
can make integration of data and harmonization efforts a complex task.

\section{Further Reading}\label{further-reading-3}

Daraio, C., Lenzerini, M., Leporelli, C., Naggar, P., Bonaccorsi, A., \&
Bartolucci, A. (2016). The advantages of an ontology-based data
management approach: Openness, interoperability and data quality.
\emph{Scientometrics, 108}(1), 441--455.
\url{https://doi.org/10.1007/s11192-016-1913-6}

Donner, P., Rimmert, C., \& Van Eck, N. J. (2020). Comparing
institutional-level bibliometric research performance indicator values
based on different affiliation disambiguation systems.
\emph{Quantitative Science Studies, 1}(1), 150--170.
\url{https://doi.org/10.1162/qss_a_00013}

Müller, M.-C., Reitz, F., \& Roy, N. (2017). Data sets for author name
disambiguation: An empirical analysis and a new resource.
\emph{Scientometrics, 111}(3), 1467--1500.
\url{https://doi.org/10.1007/s11192-017-2363-5}

Rimmert, C., Schwechheimer, H., \& Winterhager, M. (2017).
Disambiguation of author addresses in bibliometric databases - technical
report. Bielefeld: Universität Bielefeld, Institute for
Interdisciplinary Studies of Science (I²SoS).

Tang, L., \& Walsh, J. P. (2010). Bibliometric fingerprints: Name
disambiguation based on approximate structure equivalence of cognitive
maps. \emph{Scientometrics, 84}(3), 763--784.
\url{https://doi.org/10.1007/s11192-010-0196-6}

\chapter{Precision and Recall}\label{precision-and-recall}