मिडियाविकी:Gadget-Fill Index.js
Appearance
नोंद: साठवून ठेवल्यानंतर बदल पहाण्यासाठी कदाचित तुमच्या ब्राऊजरच्या कॅचेला बायपास करावे लागेल.
- फ़ायरफ़ॉक्स / सफ़ारी: धरुन ठेवा Shift टिचकी मारताना Reload, किंवा हे दाबताना Ctrl-F5 किंवा Ctrl-R (⌘-R मॅकवर)
- गुगल क्रोम: दाबा Ctrl-Shift-R (⌘-Shift-R मॅकसाठी)
- ओपेरा: कडे जा Menu → Settings (ओपेरा → पसंतीक्रम on a Mac) आणि मग गोपनियता आणि सुरक्षा → ब्राउजिंग डाटा काढून टाका → कॅचे छायाचित्रे आणि धारिणी.
/*
* Author: w:fr:Phe
*
* Import the contents of the "Book" template from Commons into the Index
* page fields at Wikisource
*
* Modified: 2020-11-10: More robust template handling to deal with Faebot
* uploads (Inductiveload)
* 2020-11-27: Some simple heuristics to improve IA metadata
* 2021-04-03: Supports authors set with {{creator|wikidata=Qxxxx}}
*/
/* eslint-disable camelcase, one-var, vars-on-top */
( function ( mw, $, Promise ) {
'use strict';
// var FillIndex = {
// };
function parse_template( text, template ) {
// find the start of the the template in the wikitext
var re = new RegExp( '{{ *' + template + '[ \n]*\\|', 'i' ),
index = text.search( re );
// The template is not
if ( index < 0 ) {
return [ null, null ];
}
var tokens = [],
old_index = index,
token_list = [
[ '{{', '}}' ],
[ '[[', ']]' ]
// ["[", "]"],
],
param_name = '',
param_content = '',
found_equals = false,
pos_param_idx = 0,
params = {};
while ( index < text.length ) {
var handled_token = false;
for ( var i = 0; i < token_list.length; i += 1 ) {
var cand_token = text.slice( index, index + token_list[ i ][ 0 ].length );
if ( cand_token === token_list[ i ][ 0 ] ) {
tokens.push( cand_token );
index += cand_token.length;
if ( cand_token !== '{{' || Object.keys( params ).length > 0 ) {
param_content += cand_token;
}
handled_token = true;
break;
} else if ( cand_token === token_list[ i ][ 1 ] &&
tokens.slice( -1 )[ 0 ] === token_list[ i ][ 0 ] ) {
tokens.pop();
index += cand_token.length;
param_content += cand_token;
handled_token = true;
break;
}
}
if ( tokens.length === 0 ) {
// end of template
break;
} else {
if ( text[ index ] === '|' && tokens.length === 1 ) {
param_name = '';
param_content = '';
found_equals = false;
} else if ( tokens.length === 1 &&
( text[ index + 1 ] === '|' || text.slice( index, index + 2 ) === '}}' ) ) {
// end of a template parameter, save it
param_name = param_name.trim();
param_content = param_content.trim();
if ( param_name.length === 0 ) {
// positional parameter (pos=0 is the template name)
params[ pos_param_idx ] = param_content;
pos_param_idx += 1;
} else {
param_name = param_name[ 0 ].toUpperCase() + param_name.slice( 1 );
params[ param_name ] = param_content;
}
} else if ( text[ index + 1 ] === '=' && !found_equals ) {
found_equals = true;
param_name = param_content;
param_content = '';
index += 1; // skip =
} else if ( !handled_token ) {
param_content += text[ index ];
}
if ( !handled_token ) {
// tokens do their own lengths
index += 1;
}
}
}
if ( tokens.length === 0 ) {
// got to end of template
return [ params, text.slice( old_index, index ) ];
}
return [ null, text.slice( old_index, index ) ];
}
/*
* Converts text to title case.
*
* BOOK IV. THE INSTRUCTIONS OF I -> Book IV. The Instructions of I.
*
* Takes care of:
* - all-caps roman numerals
* - always title-cases the first words after .
* - otherwise title-cases words except a list of exceptions like 'a', 'of'
*/
var toTitleCase = function ( str ) {
var titler = function ( word ) {
if ( word.length === 0 ) {
return word;
}
return word.replace( word[ 0 ], word[ 0 ].toUpperCase() );
};
var all_capped = function ( word ) {
// check for roman numerals (and "I"), maybe followed by punct
return ( word.search( /^[ivxlcdm]+\b.$/ ) > -1 );
};
// if bookish title case, not all words are capped
var no_cap_words = [ 'a', 'an', 'be', 'the', 'of', 'on', 'to', 'at', 'this', 'than',
'then', 'by', 'and', 'for', 'with', 'in'
];
var words = str.toLowerCase().split( ' ' );
var titled = [];
var new_sentence = true;
for ( var i = 0; i < words.length; i++ ) {
if ( all_capped( words[ i ] ) ) {
// some words are all caps always
titled.push( words[ i ].toUpperCase() );
} else if ( new_sentence || no_cap_words.indexOf( words[ i ] ) === -1 ) {
// new sentences and most words get title casing
titled.push( titler( words[ i ] ) );
} else {
// lower
titled.push( words[ i ] );
}
new_sentence = words[ i ].search( /\.$/ ) !== -1;
}
return titled.join( ' ' );
};
var extract_dict = {},
field_names = {};
function setup_extract_dict() {
extract_dict = self.fill_index_data.extract_dict;
field_names = self.fill_index_data.field_names;
}
/**
* Set the appropriate input field
*
* @param {string} idx the field index
* @param {string|Promise} content the new content, or a Promise that resolves it
*/
function set_field( idx, content ) {
// this resolves with either the raw value, or the resolution of the Promise
// eslint-disable-next-line compat/compat
Promise.resolve( content ).then( function ( content_value ) {
content_value = content_value.replace( / ([;:,]) ?/, '$1 ' );
// fix any sneaky double spaces
content_value = content_value.replace( / +/g, ' ' );
var field_name = field_names[ idx ],
f = document.getElementsByName( 'wpprpindex-' + field_name )[ 0 ];
if ( f ) {
f.value = content_value;
}
} );
}
function get_wd_author( qid ) {
// eslint-disable-next-line compat/compat
return new Promise( function ( resolve, reject ) {
$.ajax( {
url: '//wikidata.org/w/api.php',
data: {
format: 'json',
action: 'wbgetentities',
ids: qid,
props: 'sitelinks'
},
dataType: 'jsonp',
cache: true,
success: function ( data ) {
var author = data.entities[ qid ].sitelinks[ mw.config.get( 'wgWikiID' ) ].title;
console.log( author );
resolve( author );
},
error: function ( error ) {
reject( error );
}
} );
} );
}
// returns a promise that resolves the author
function process_author( str ) {
str = str.replace( /^[*:][ ]*/, '' );
str = str.trim();
var author_promise;
if ( str.match( /Q[0-9]+/ ) ) {
author_promise = get_wd_author( str );
} else {
// strip dates - these are nearly always not needed
str = str.replace( /(?:, )?(?:(?:ca\.|fl\.) )?(\(?\d+-\d+\)?).?$/, '' );
// strip birth date
str = str.replace( /(?:, )(?:b\.|d\.) +\d{3,4}$/, '' );
// strip initial expansions
str = str.replace( /(?:[A-Z]. ?)+ \((.*)\)/, '$1' );
str = str.replace( /, (Sir|Lord)$/, '' );
// Last, First -> First Last
str = str.replace( /^([^,]+), ([^,]+)$/, '$2 $1' );
// Fix initials without dots
str = str.replace( / ([A-Z]) /g, ' $1. ' );
// Fix bogus fullstops
// str = str.replace(/(?<!Jr|Sr)\.$/, "");
// just resolve right now
author_promise = Promise.resolve( str );
}
return author_promise.then( function ( author ) {
// prevent the pipe trick triggering on the JS
// eslint-disable-next-line no-useless-concat
return '[' + '[' + self.fill_index_data.ns_author_name + ':' + author + '|]]';
} );
}
// returns a promise that resolves with the processed author list
function process_authors( str ) {
// strip creator templates:
str = str.replace( /{{[ ]*[Cc]reator[ ]*:[ ]*(.*?)[ ]*}}/g, '$1\n' );
// TODO: fix wikidata here
str = str.replace( /{{[ ]*[Cc]reator[ ]*\|[ ]*[Ww]ikidata[ ]*=[ ]*(Q[0-9]*)}}/g, '$1\n' );
var as = str.split( '\n' );
as = as.filter( function ( s ) {
return !!s.trim();
} );
// map array to promises
var promises = as.map( function ( author ) {
return process_author( author );
} );
// eslint-disable-next-line compat/compat
return Promise.all( promises ).then( function ( results ) {
var list = results.join( ', ' );
// console.log( list );
return list;
} );
}
function split_city_publisher( str ) {
// most books are published in a few cities
var cities = [ /London/, /Edinburgh/, /Oxford/, /Cambridge/,
/New York/, /Boston/, /Philadelphia/, /Washington D. ?C./,
/Paris/,
/Berlin/, /Stuttgart/, /Jena/,
/Hong Kong/, /Shanghai/,
/Calcutta/, /Bombay/, /Delhi/ ],
city = '',
publisher = str,
parts;
if ( str.indexOf( ':' ) > -1 ) {
// a colon: assume this is a city: publisher
parts = str.split( ':' );
city = parts[ 0 ];
publisher = parts.slice( 1 ).join( ':' );
} else {
parts = str.split( /[,;:] / );
if ( parts.length > 1 ) {
for ( var i = 0; i < cities.length; i++ ) {
if ( parts[ 0 ].match( cities[ i ] ) ) {
city = parts[ 0 ];
publisher = parts.slice( 1 ).join( ', ' );
break;
}
}
}
}
return [ publisher.trim(), city.trim() ];
}
function processVolume( v ) {
// first, strip off either : Foo or (Foo):
var match = v.match( /(.*?) *(?:\((.*)\)|: *(.*))?$/ );
var vol = v;
var v_desc = '';
if ( match ) {
vol = match[ 1 ];
v_desc = match[ 2 ];
}
// Add "Volume " if it looks like we need it
vol = vol.replace( /^(?:(?:vol|v)\. ?)?([-0-9]+)$/i, 'Volume $1' );
return [ vol, v_desc ];
}
function processCity( c ) {
c = c.replace( /\{\{ *City *\| *(.*?) *\}\}/i, '$1' );
return c;
}
function extract_content( data ) {
var importationDone = false;
// until Object entries is allowed
// eslint-disable-next-line no-jquery/no-each-util
$.each( data.query.pages, function ( ids, page ) {
if ( ids < 0 ) {
return;
}
var content = page.revisions[ 0 ][ '*' ],
temp_parsed = parse_template( content, 'Book' );
if ( temp_parsed[ 0 ] === null ) {
console.error( 'Failed to parse Book template' );
} else {
var title = temp_parsed[ 0 ][ extract_dict.Title ];
if ( title ) {
title = toTitleCase( title );
set_field( 'Title', "''[[" + title + "]]''" );
}
for ( var idx in extract_dict ) {
var template_content = '';
if ( typeof extract_dict[ idx ] === 'string' ) {
template_content = temp_parsed[ 0 ][ extract_dict[ idx ] ];
} else {
// find the first matching parameter
for ( var i = 0; i < extract_dict[ idx ].length; i++ ) {
template_content = temp_parsed[ 0 ][ extract_dict[ idx ][ i ] ];
if ( template_content !== undefined && template_content.length > 0 ) {
break;
}
}
}
if ( template_content !== undefined && template_content.length > 0 ) {
switch ( idx ) {
case 'Editor':
case 'Author':
case 'Translator':
case 'Illustrator':
set_field( idx, process_authors( template_content ) );
break;
case 'Publisher':
// it is very common for the Commons publisher field
// to contain the location
var pub_city = split_city_publisher( template_content );
if ( pub_city[ 1 ].length > 0 ) {
set_field( 'Publisher', pub_city[ 0 ] );
set_field( 'City', pub_city[ 1 ] );
} else {
set_field( 'Publisher', pub_city[ 0 ] );
}
break;
case 'Volume':
var v = processVolume( template_content );
var v_field;
if ( title !== undefined ) {
v_field = '[[' + title +
'/' + v[ 0 ] + '|' + v[ 0 ] + ']]';
} else {
// fallback
v_field = v[ 0 ];
}
if ( v[ 1 ] ) {
v_field += ' (' + v[ 1 ] + ')';
}
set_field( idx, v_field );
break;
case 'Title':
break;
case 'City':
set_field( idx, processCity( template_content ) );
break;
default:
set_field( idx, template_content );
}
}
}
}
// set the file type selector
set_field( 'Source', mw.config.get( 'wgTitle' ).split( '.' ).slice( -1 )[ 0 ] );
// set the sort key
{
var skTitle = temp_parsed[ 0 ][ extract_dict.Title ];
if ( skTitle !== undefined ) {
var titlewords = skTitle.split( ' ' );
if ( [ 'The', 'A', 'An', 'Of' ].indexOf( titlewords[ 0 ] ) >= 0 ) {
skTitle = titlewords.slice( 1 ).join( ' ' ) + ', ' + titlewords[ 0 ];
skTitle = skTitle[ 0 ].toUpperCase() + skTitle.slice( 1 );
set_field( 'Key', skTitle );
}
}
}
importationDone = true;
} );
return importationDone;
}
function common_content( data ) {
if ( !extract_content( data ) ) {
$.ajax( {
url: mw.util.wikiScript( 'api' ),
data: {
format: 'json',
action: 'query',
prop: 'revisions',
rvprop: 'content',
titles: 'File:' + mw.config.get( 'wgTitle' )
}
} )
.done( extract_content );
}
}
function do_extraction() {
$.ajax( {
url: '//commons.wikimedia.org/w/api.php',
data: {
format: 'json',
action: 'query',
prop: 'revisions',
rvprop: 'content',
titles: 'File:' + mw.config.get( 'wgTitle' )
},
dataType: 'jsonp'
} )
.done( common_content );
}
function setup() {
setup_extract_dict();
// eslint-disable-next-line no-jquery/no-global-selector
if ( $( '.mw-newarticletext' ).length === 0 ) {
// Portlet link to re-extract
var portlet = mw.util.addPortletLink(
'p-tb',
'#',
'Re-fill index',
't-refill-index',
'Re-import this index page\'s data from the Commons file'
);
$( portlet ).on( 'click', function ( e ) {
e.preventDefault();
do_extraction();
} );
} else {
do_extraction();
}
}
/* Localisation section, you can provide your own data before loading this script to
* change the script behavior
*/
if ( !self.fill_index_data ) {
self.fill_index_data = {};
}
if ( !self.fill_index_data.ns_author_name ) {
self.fill_index_data.ns_author_name = 'Author';
}
if ( !self.fill_index_data.extract_dict ) {
// Commons Book template field names
// Should not need to be internationalised
self.fill_index_data.extract_dict = {
Editor: 'Editor',
Publisher: 'Publisher',
Author: 'Author',
Translator: 'Translator',
Volume: 'Volume',
Illustrator: 'Illustrator',
'Image page': 'Image page',
Title: 'Title',
Date: [ 'Publication date', 'Date' ],
City: 'City',
Source: 'Source',
LCCN: 'LCCN',
OCLC: 'OCLC'
};
}
if ( !self.fill_index_data.field_names ) {
// Proofread page field names
self.fill_index_data.field_names = {
Editor: 'Editor',
Publisher: 'Publisher',
Author: 'Author',
Translator: 'Translator',
Volume: 'Volume',
Illustrator: 'Illustrator',
'Image page': 'Image',
Title: 'Title',
Date: 'Year',
City: 'Address',
Source: 'Source',
LCCN: 'LCCN',
OCLC: 'OCLC',
Key: 'Key'
};
}
/* end of localisation section */
$( function () {
if ( mw.config.get( 'wgCanonicalNamespace' ) === 'Index' && mw.config.get( 'wgAction' ) === 'edit' ) {
setup();
}
} );
// eslint-disable-next-line no-undef
}( mediaWiki, jQuery, Promise ) );