# # You can use this CREATE statement for "url" table instead of # default one. This structure usefull for huge "cache mode" databases # with several millions URLs. # # New features of this scheme: # # * Support for MySQL RAID to break 2/4G data file size limit. # * Relatively small "url.MYI" index file size: # there is no unique index on "url" field. # * Quick search for expired documents at indexing time using # "key_next_index_time" index. It significantly improves # indexing speed for big databases. # * UNIQUE rec_id is generated in indexer using CRC32(url) # * It turns on large file MySQL support for "url" table. # # Disadvantage: # * This scheme probably will loose some documents as far as CRC32 # algorythm which is used for rec_id generation can give same values for # different URLs. According to our tests it gives approximately # 100 URL pairs with the same CRC32 within 3.5 millions of unique URLs. # It means that 0.0028% documents will be losten. # # Requires: # * Specify "--with-raid" and omit "--disable-large-files" when # installing MySQL. # * Use "UseCRC32UrlID yes" command in your indexer.conf # DROP TABLE url; CREATE TABLE url ( rec_id int(11) DEFAULT '0' NOT NULL, status int(11) DEFAULT '0' NOT NULL, docsize int(11) DEFAULT '0' NOT NULL, next_index_time INT NOT NULL, last_mod_time INT NOT NULL, referrer int(11) DEFAULT '0' NOT NULL, hops int(11) DEFAULT '0' NOT NULL, crc32 int(11) DEFAULT '0' NOT NULL, seed smallint(6) DEFAULT '0' NOT NULL, bad_since_time INT NOT NULL, site_id int(11), server_id int(11), pop_rank float DEFAULT 0 NOT NULL, url char(128) binary DEFAULT '' NOT NULL, PRIMARY KEY (rec_id), UNIQUE url (url), KEY key_crc (crc32), KEY key_seed (seed), KEY key_referrer (referrer), KEY key_bad_since_time (bad_since_time), KEY key_next_index_time (next_index_time), KEY key_site_id (site_id) ) RAID_TYPE=RAID0 RAID_CHUNKS=16 RAID_CHUNKSIZE=256 MAX_ROWS=100000000 AVG_ROW_LENGTH=512 ;