import array import geohash import os import math import operator import six import ujson as json from collections import defaultdict, OrderedDict from leveldb import LevelDB from geodata.distance.haversine import haversine_distance class PointIndex(object): include_only_properties = None persistent_index = False cache_size = 0 POINTS_DB_DIR = 'points' GEOHASH_PRECISION = 7 PROPS_FILENAME = 'properties.json' POINTS_FILENAME = 'points.json' INDEX_FILENAME = 'index.json' def __init__(self, index=None, save_dir=None, points=None, points_path=None, points_db=None, points_db_path=None, index_path=None, include_only_properties=None, precision=GEOHASH_PRECISION): if save_dir: self.save_dir = save_dir else: self.save_dir = None if include_only_properties and hasattr(include_only_properties, '__contains__'): self.include_only_properties = include_only_properties if not index_path: index_path = os.path.join(save_dir or '.', self.INDEX_FILENAME) self.index_path = index_path if not index: self.index = defaultdict(list) else: self.index = index if not points_path: points_path = os.path.join(save_dir or '.', self.POINTS_FILENAME) self.points_path = points_path if not points: self.points = array.array('d') else: self.points = points if not points_db_path: points_db_path = os.path.join(save_dir or '.', self.POINTS_DB_DIR) if not points_db: self.points_db = LevelDB(points_db_path) else: self.points_db = points_db self.precision = precision self.i = 0 def index_point(self, lat, lon): code = geohash.encode(lat, lon)[:self.precision] for key in [code] + geohash.neighbors(code): self.index[key].append(self.i) self.points.extend([lat, lon]) def add_point(self, lat, lon, properties, cache=False, include_only_properties=None): if include_only_properties is None and self.include_only_properties: include_only_properties = self.include_only_properties if include_only_properties is not None: properties = {k: v for k, v in properties.iteritems() if k in include_only_properties} self.index_point(lat, lon) self.points_db.Put(self.properties_key(self.i), json.dumps(properties)) self.i += 1 def load_properties(self, filename): properties = json.load(open(filename)) self.i = int(properties.get('num_points', self.i)) self.precision = int(properties.get('precision', self.precision)) def save_properties(self, out_filename): out = open(out_filename, 'w') json.dump({'num_points': str(self.i), 'precision': self.precision}, out) def save_index(self): if not self.index_path: self.index_path = os.path.join(self.save_dir or '.', self.INDEX_FILENAME) json.dump(self.index, open(self.index_path, 'w')) @classmethod def load_index(cls, d, index_name=None): return json.load(open(os.path.join(d, index_name or cls.INDEX_FILENAME))) def save_points(self): json.dump(self.points, open(self.points_path, 'w')) @classmethod def load_points(cls, d): return array.array('d', json.load(open(os.path.join(d, cls.POINTS_FILENAME)))) def properties_key(self, i): return 'props:{}'.format(i) def get_properties(self, i): return json.loads(self.points_db.Get(self.properties_key(i))) def compact_points_db(self): self.points_db.CompactRange('\x00', '\xff') def save(self): self.save_index() self.save_points() self.compact_points_db() self.save_properties(os.path.join(self.save_dir, self.PROPS_FILENAME)) @classmethod def load(cls, d): index = cls.load_index(d) points = cls.load_points(d) points_db = LevelDB(os.path.join(d, cls.POINTS_DB_DIR)) point_index = cls(index=index, points=points, points_db=points_db) point_index.load_properties(os.path.join(d, cls.PROPS_FILENAME)) return point_index def __iter__(self): for i in xrange(self.i): lat, lon = self.points[i * 2], self.points[i * 2 + 1] yield self.get_properties(i), lat, lon def __len__(self): return self.i def get_candidate_points(self, latitude, longitude): code = geohash.encode(latitude, longitude)[:self.precision] candidates = OrderedDict() candidates.update([(k, None) for k in self.index.get(code, [])]) for neighbor in geohash.neighbors(code): candidates.update([(k, None) for k in self.index.get(neighbor, [])]) return candidates.keys() def point_distances(self, latitude, longitude): candidates = self.get_candidate_points(latitude, longitude) return [(i, self.points[i * 2], self.points[i * 2 + 1], haversine_distance(latitude, longitude, self.points[i * 2], self.points[i * 2 + 1])) for i in candidates] def all_nearby_points(self, latitude, longitude): distances = self.point_distances(latitude, longitude) if not distances: return [] return sorted(distances, key=operator.itemgetter(-1)) def points_with_properties(self, results): return [(self.get_properties(i), lat, lon, distance) for i, lat, lon, distance in results] def nearest_points(self, latitude, longitude): return self.points_with_properties(self.all_nearby_points(latitude, longitude)) def nearest_n_points(self, latitude, longitude, n=2): return self.points_with_properties(self.all_nearby_points(latitude, longitude)[:n]) def nearest_point(self, latitude, longitude): distances = self.all_nearby_points(latitude, longitude) if not distances: return None return self.points_with_properties(distances[:1])[0]