这是indexloc提供的服务,不要输入任何密码
Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@ coverage
index
*.gem
.bundle
*~
7 changes: 6 additions & 1 deletion README.rdoc
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,11 @@ acts_as_indexed plugin folder. Then point your web browser at
Alternatively, you can view the rdoc documentation
online[http://rdoc.info/projects/dougal/acts_as_indexed/].

== Stemming

This branch has basic support for stemming using the Porter stemmer.
Quoted portions of queries will be searched exactly (without
stemming).

== Problems, Comments, Suggestions?

Expand All @@ -189,7 +194,7 @@ All of the above are most welcome. mailto:dougal.s@gmail.com
* Ben Anderson
* Theron Toomey
* Uģis Ozols

* David Turner

== Future Releases

Expand Down
5 changes: 5 additions & 0 deletions lib/acts_as_indexed/configuration.rb
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@ class Configuration
# Default is 3.
attr_reader :min_word_size

# The regular expression which defines how words are separated;
# words are also separated on word-nonword boundaries
attr_reader :space_regexp

# Proc that allows you to turn on or off index for a record.
# Useful if you don't want an object to be placed in the index, such as a
# draft post.
Expand All @@ -42,6 +46,7 @@ def initialize
@if_proc = if_proc
@case_sensitive = false
@disable_auto_indexing = false
@space_regexp = /[\s\u00a0\u1680\u180e\u2000-\u200a\u202f\u205f\u3000]+/
end

# Since we cannot expect Rails to be available on load, it is best to put
Expand Down
128 changes: 91 additions & 37 deletions lib/acts_as_indexed/search_atom.rb
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,11 @@
module ActsAsIndexed #:nodoc:
class SearchAtom

# Contains a hash of records.
# { 'record_id' => [pos1, pos2, pos] }
# Contains a hash keyed by words having a commmon stem
# each value is another hash from record ids to word numbers
# { 'foo' => {record_id => [pos1, pos2, ...], }
# 'fooing' => {record_id => [pos3, pos4, ...] }
# }
#--
# Weighting:
# http://www.perlmonks.com/index.pl?node_id=27509
Expand All @@ -22,68 +25,114 @@ def initialize(records={})

# Returns true if the given record is present.
def include_record?(record_id)
@records.include?(record_id)
@records.values.any? {|record| record.member?(record_id)}
end

# Adds +record_id+ to the stored records.
def add_record(record_id)
@records[record_id] = [] unless include_record?(record_id)
def include_token?(token)
return @records.member? token
end

# Adds +pos+ to the array of positions for +record_id+.
def add_position(record_id, pos)
add_record(record_id)
@records[record_id] << pos
# Adds +token+ to the stored tokens.
def add_token(token)
@records[token] = {} unless @records[token]
end

def add_record_token(token, record_id)
add_token(token)
@records[token][record_id] ||= []
end

# Adds +pos+ to the array of positions for +token+ and +record_id+.
def add_position(record_id, token, pos)
add_record_token(token, record_id)
@records[token][record_id] << pos
end

# This returns record->[positions], where positions is
# all the positions across all tokens
def flat_records
flat = {}
@records.each do |token, records|
records.each do |record, positions|
flat[record] ||= []
flat[record] += positions
end
end
flat
end

# Returns all record IDs stored in this Atom.
def record_ids
@records.keys
@records.values.map{|h| h.keys}.inject '+'
end

# Returns an array of positions for +record_id+ stored in this Atom.
def positions(record_id)
@records[record_id]
def all_positions(record_id)
@records.values.map {|h| h[record_id]}.inject '+'
end

# Returns an hash of record->array of positions for +token+ stored
# in this Atom.
def records_by_token(token)
@records[token]
end

# Removes +record_id+ from this Atom.
def remove_record(record_id)
@records.delete(record_id)
@records.values.each{|v| v.delete(record_id)}
end

# Creates a new SearchAtom with the combined records from self and other
def +(other)
SearchAtom.new(@records.clone.merge!(other.records) { |key, _old, _new|
_old + _new
})
SearchAtom.new(@records.clone.merge!(other.records) {
|key, _old, _new|
_old.merge(_new) {
|k, o, n|
o + n
}
})
end

def exact(token)
SearchAtom.new(Hash[*@records.find_all {|k, v| k == token }.flatten])
end


# Creates a new SearchAtom with records in other removed from self.
def -(other)
records = @records.clone.reject { |name, records| other.records.include?(name) }
records = {}
@records.each { |token, records_for_token|
if other.records.include? (token)
other_token_records = other.records[token]
new_records = records_for_token.reject {|id, records| other_token_records.include?(id) }
if new_records.size
records[token] = new_records
end
end
}
SearchAtom.new(records)
end

# Returns at atom containing the records and positions of +self+ preceded by +former+
# "former latter" or "big dog" where "big" is the former and "dog" is the latter.
# Returns an atom containing the records and positions of +self+
# preceded by +former+ "former latter" or "big dog" where "big" is
# the former and "dog" is the latter.

def preceded_by(former)
matches = SearchAtom.new
latter = {}
former.record_ids.each do |rid|
latter[rid] = @records[rid] if @records[rid]
end
# Iterate over each record in latter.
latter.each do |record_id,pos|

# Iterate over each position.
pos.each do |p|
# Check if previous position is in former.
if former.include_position?(record_id,p-1)
matches.add_record(record_id) unless matches.include_record?(record_id)
matches.add_position(record_id,p)

for former_token, former_records in former.records
for latter_token, latter_records in @records
for latter_record, latter_positions in latter_records
next unless former_records.member? latter_record

#this record appears in both
for former_pos in former_records[latter_record]
if latter_positions.member? former_pos + 1
matches.add_position(latter_record, latter_token, former_pos + 1)
end
end
end
end

end
matches
end
Expand All @@ -92,7 +141,8 @@ def preceded_by(former)
# atom.
def weightings(records_size)
out = {}
@records.each do |r_id, pos|
flat = flat_records
flat.each do |r_id, pos|

# Fixes a bug when the records_size is zero. i.e. The only record
# contaning the word has been deleted.
Expand All @@ -104,15 +154,19 @@ def weightings(records_size)
# weighting = frequency * log (records.size / records_with_atom)
## parndt 2010/05/03 changed to records_size.to_f to avoid -Infinity Errno::ERANGE exceptions
## which would happen for example Math.log(1 / 20) == -Infinity but Math.log(1.0 / 20) == -2.99573227355399
out[r_id] = pos.size * Math.log(records_size.to_f / @records.size)
out[r_id] = pos.size * Math.log(records_size.to_f / flat_records.size)
end
out
end

protected

def include_position?(record_id,pos)
@records[record_id].include?(pos)
@records.any? {|record|
if record.include? record_id
record[record_id].include?(pos)
end
}
end

end
Expand Down
Loading